Commit 8986060e authored by Adam Osewski's avatar Adam Osewski
Browse files

Warp raked and thread raked access pattern.

parent 70b6d031
...@@ -6,13 +6,16 @@ ...@@ -6,13 +6,16 @@
#include <iostream> #include <iostream>
#include <sstream> #include <sstream>
#include "ck/host_utility/io.hpp"
#include "ck/utility/common_header.hpp" #include "ck/utility/common_header.hpp"
#include "ck/tensor_description/tensor_descriptor.hpp" #include "ck/tensor_description/tensor_descriptor.hpp"
#include "ck/tensor_description/tensor_descriptor_helper.hpp" #include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/device_gemm.hpp" #include "ck/tensor_operation/gpu/device/device_gemm.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_direct_c_write_out_roofline.hpp" #include "ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_direct_c_write_out_roofline_warp_raked.hpp"
// #include
// "ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_direct_c_write_out_roofline_thread_raked.hpp"
// #include "ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_direct_c_write_out.hpp" // #include "ck/tensor_operation/gpu/grid/gridwise_gemm_xdl_direct_c_write_out.hpp"
#include "ck/host_utility/device_prop.hpp" #include "ck/host_utility/device_prop.hpp"
#include "ck/host_utility/kernel_launch.hpp" #include "ck/host_utility/kernel_launch.hpp"
...@@ -656,7 +659,7 @@ struct DeviceGemm_Xdl_DirectCWriteOut : public DeviceGemm<ALayout, ...@@ -656,7 +659,7 @@ struct DeviceGemm_Xdl_DirectCWriteOut : public DeviceGemm<ALayout,
// polymorphic // polymorphic
std::string GetTypeString() const override std::string GetTypeString() const override
{ {
auto str = std::stringstream(); auto str = std::stringstream{};
std::map<LoopScheduler, std::string> LoopSchedToString{ std::map<LoopScheduler, std::string> LoopSchedToString{
{LoopScheduler::Default, "Default"}, {LoopScheduler::Interwave, "Interwave"}}; {LoopScheduler::Default, "Default"}, {LoopScheduler::Interwave, "Interwave"}};
...@@ -664,6 +667,13 @@ struct DeviceGemm_Xdl_DirectCWriteOut : public DeviceGemm<ALayout, ...@@ -664,6 +667,13 @@ struct DeviceGemm_Xdl_DirectCWriteOut : public DeviceGemm<ALayout,
std::map<PipelineVersion, std::string> PipelineVersionToString{{PipelineVersion::v1, "v1"}, std::map<PipelineVersion, std::string> PipelineVersionToString{{PipelineVersion::v1, "v1"},
{PipelineVersion::v2, "v2"}}; {PipelineVersion::v2, "v2"}};
auto c_thr_dst_access_order_str = std::ostringstream{};
c_thr_dst_access_order_str << "{";
ck::static_for<0, CThreadTransferDstAccessOrder::Size(), 1>{}([&](auto i) {
c_thr_dst_access_order_str << CThreadTransferDstAccessOrder::At(i).value << ", ";
});
c_thr_dst_access_order_str << "}";
// clang-format off // clang-format off
str << "DeviceGemm_Xdl_DirectCWriteOut" str << "DeviceGemm_Xdl_DirectCWriteOut"
<< "<" << "<"
...@@ -672,12 +682,15 @@ struct DeviceGemm_Xdl_DirectCWriteOut : public DeviceGemm<ALayout, ...@@ -672,12 +682,15 @@ struct DeviceGemm_Xdl_DirectCWriteOut : public DeviceGemm<ALayout,
<< NPerBlock << ", " << NPerBlock << ", "
<< KPerBlock << ", " << KPerBlock << ", "
<< AK1 << ", " << AK1 << ", "
<< BK1 << BK1 << ", "
<< c_thr_dst_access_order_str.str() << ", "
<< CThreadTransferDstVectorDim << ", "
<< CThreadTransferDstScalarPerVector << ", "
<< ">" << ">"
<< " LoopScheduler: " << " LoopScheduler: "
<< LoopSchedToString[LoopSched] << ", " << LoopSchedToString[LoopSched] << "; "
<< "PipelineVersion: " << "PipelineVersion: "
<< PipelineVersionToString[PipelineVer];; << PipelineVersionToString[PipelineVer];
// clang-format on // clang-format on
return str.str(); return str.str();
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment