Search.setIndex({"docnames":["API_Reference_Guide","Contributors_Guide","Disclaimer","Linux_Install_Guide","Supported_Primitives_Guide","dockerhub","index","tutorial_hello_world"],"filenames":["API_Reference_Guide.rst","Contributors_Guide.rst","Disclaimer.rst","Linux_Install_Guide.rst","Supported_Primitives_Guide.rst","dockerhub.rst","index.rst","tutorial_hello_world.rst"],"titles":["<span class=\"section-number\">5. </span>API Reference Guide","<span class=\"section-number\">6. </span>Contributor\u2019s Guide","<span class=\"section-number\">7. </span>Disclaimer","<span class=\"section-number\">1. </span>Getting Started Guide","<span class=\"section-number\">4. </span>Supported Primitives Guide","<span class=\"section-number\">3. </span>CK docker hub","Composable Kernel User Guide","<span class=\"section-number\">2. </span>CK Hello world"],"terms":{"thi":[0,2,3,4,6,7],"document":[0,2,4,6],"contain":[0,2,3,4,5,7],"detail":[0,4,5,7],"compos":[0,3,4,7],"librari":[0,5,6],"introduc":0,"some":[0,7],"kei":0,"design":0,"principl":0,"ar":[0,2,5,7],"write":[0,5],"new":[0,2,4,7],"class":0,"extend":0,"function":[0,7],"section":[0,7],"describ":0,"how":[0,7],"struct":0,"store":0,"data":[0,7],"gpu":[0,5,7],"devic":[0,2,7],"memori":0,"The":[0,2,3,5,7],"algorithm":[0,4],"i":[0,2,3,4,6,7],"defin":0,"dao":0,"et":0,"al":0,"dfe":0,"22":0,"list":[0,3,7],"implement":[0,4],"gridwis":0,"templat":0,"typenam":0,"floatab":0,"floatgemmacc":0,"floatcshuffl":0,"floatc":0,"aelementwiseoper":0,"belementwiseoper":0,"accelementwiseoper":0,"b1elementwiseoper":0,"celementwiseoper":0,"inmemorydataoperationenum":0,"cglobalmemorydataoper":0,"agriddesc_ak0_m_ak1":0,"bgriddesc_bk0_n_bk1":0,"b1griddesc_bk0_n_bk1":0,"cgriddesc_m_n":0,"index_t":0,"numgemmkprefetchstag":0,"blocksiz":0,"mperblock":0,"nperblock":0,"kperblock":0,"gemm1nperblock":0,"gemm1kperblock":0,"ak1valu":0,"bk1valu":0,"b1k1valu":0,"mperxdl":0,"nperxdl":0,"mxdlperwav":0,"nxdlperwav":0,"gemm1nxdlperwav":0,"ablocktransferthreadclusterlengths_ak0_m_ak1":0,"ablocktransferthreadclusterarrangeord":0,"ablocktransfersrcaccessord":0,"ablocktransfersrcvectordim":0,"ablocktransfersrcscalarpervector":0,"ablocktransferdstscalarpervector_ak1":0,"bool":0,"athreadtransfersrcresetcoordinateafterrun":0,"ablockldsextram":0,"bblocktransferthreadclusterlengths_bk0_n_bk1":0,"bblocktransferthreadclusterarrangeord":0,"bblocktransfersrcaccessord":0,"bblocktransfersrcvectordim":0,"bblocktransfersrcscalarpervector":0,"bblocktransferdstscalarpervector_bk1":0,"bthreadtransfersrcresetcoordinateafterrun":0,"bblockldsextran":0,"b1blocktransferthreadclusterlengths_bk0_n_bk1":0,"b1blocktransferthreadclusterarrangeord":0,"b1blocktransfersrcaccessord":0,"b1blocktransfersrcvectordim":0,"b1blocktransfersrcscalarpervector":0,"b1blocktransferdstscalarpervector_bk1":0,"b1threadtransfersrcresetcoordinateafterrun":0,"b1blockldsextran":0,"cshufflemxdlperwavepershuffl":0,"cshufflenxdlperwavepershuffl":0,"cshuffleblocktransferclusterlengths_mblock_mperblock_nblock_nperblock":0,"cshuffleblocktransferscalarpervector_nperblock":0,"loopschedul":[0,7],"loopsch":0,"padn":0,"maskoutuppertriangl":0,"pipelinevers":[0,7],"pipelinev":0,"v1":[0,7],"gridwisebatchedgemmsoftmaxgemm_xdl_cshuffl":0,"gemm":0,"softmax":[0,6],"fusion":0,"blockwis":0,"threadgroup":0,"srcelementwiseoper":0,"dstelementwiseoper":0,"dstinmemop":0,"blockslicelength":0,"threadclusterlength":0,"threadclusterarrangeord":0,"srcdata":0,"dstdata":0,"srcdesc":0,"dstdesc":0,"srcdimaccessord":0,"dstdimaccessord":0,"srcvectordim":0,"dstvectordim":0,"srcscalarpervector":0,"dstscalarpervector":0,"srcscalarstrideinvector":0,"dstscalarstrideinvector":0,"threadtransfersrcresetcoordinateafterrun":0,"threadtransferdstresetcoordinateafterrun":0,"numthreadscratch":0,"1":[0,4,7],"threadgrouptensorslicetransfer_v4r1":0,"transfer":0,"version":[0,2,5,7],"doe":[0,7],"follow":[0,3,4],"thing":0,"avoid":0,"scratch":0,"issu":0,"staticallyindexedarrai":0,"instead":0,"c":[0,2,5],"arrai":0,"thread":0,"buffer":0,"threadwisetensorslicetransfer_v3":0,"keep":0,"tensor":[0,7],"descriptor":0,"run":[0,4,5,6],"construct":0,"coordin":[0,7],"floatacc":0,"atiledesc":0,"btiledesc":0,"ammatiledesc":0,"bmmatiledesc":0,"mrepeat":0,"nrepeat":0,"kpack":0,"transposec":0,"fals":0,"ammakstrid":0,"xdlopsgemm":0,"k0perxdlop":0,"bmmakstrid":0,"blockwisegemmxdlops_v2":0,"support":[0,6,7],"regular":0,"xdl":0,"output":[0,4,7],"m2_m3_m4_m2":0,"transpos":0,"m2_n2_n3_n4":0,"decoupl":0,"input":0,"tile":[0,4,7],"mma":0,"order":[0,3],"both":0,"vgpr":0,"ld":0,"sourc":0,"configur":[0,7],"k":[0,4],"index":0,"start":[0,6,7],"posit":0,"step":0,"size":[0,4],"after":0,"each":0,"fma":0,"instruct":[0,3],"accdatatyp":0,"threadmap_m_k":0,"threadclusterdesc_m_k":0,"threadslicedesc_m_k":0,"ignorenan":0,"blockwisesoftmax":0,"paramet":[0,7],"block":[0,4],"accumul":0,"type":[0,7],"id":[0,5],"m_k":0,"threadwis":0,"cluster":0,"slice":0,"flag":[0,7],"ignor":0,"nan":0,"default":[0,7],"elementwiseoper":0,"slicelength":0,"dimaccessord":0,"enable_if":0,"isknownatcompiletim":0,"threadwisetensorslicetransfer_statictostat":0,"do":[0,6,7],"NOT":0,"involv":0,"ani":[0,2],"staticbuff":0,"tri":0,"daniel":0,"y":[0,4],"fu":0,"stefano":0,"ermon":0,"atri":0,"rudra":0,"christoph":0,"r":[0,2,7],"\u00e9":0,"fast":[0,7],"effici":[0,7],"exact":0,"attent":[0,7],"io":0,"awar":0,"arxiv":0,"preprint":0,"2205":0,"14135":0,"2022":0,"todo":[1,3],"inform":2,"present":2,"purpos":[2,5],"onli":[2,7],"mai":2,"technic":[2,7],"inaccuraci":2,"omiss":2,"typograph":2,"error":2,"herein":2,"subject":2,"chang":[2,7],"render":2,"inaccur":2,"mani":[2,7],"reason":2,"includ":[2,5,7],"limit":2,"product":2,"roadmap":[2,6],"compon":[2,7],"motherboard":2,"model":[2,5,7],"releas":[2,5,7],"differ":[2,7],"between":2,"manufactur":2,"softwar":2,"bio":2,"flash":2,"firmwar":2,"upgrad":2,"like":[2,5,7],"comput":[2,4,7],"system":[2,7],"ha":[2,7],"risk":2,"secur":2,"vulner":2,"cannot":2,"complet":2,"prevent":2,"mitig":2,"assum":2,"oblig":2,"updat":[2,4],"otherwis":2,"correct":2,"revis":2,"howev":2,"reserv":2,"right":[2,6,7],"make":[2,5,7],"from":[2,4,7],"time":[2,4,7],"content":2,"hereof":2,"without":2,"notifi":2,"person":2,"provid":[2,5],"AS":2,"NO":2,"represent":2,"OR":2,"warranti":2,"WITH":2,"respect":2,"TO":2,"THE":2,"AND":2,"respons":2,"FOR":2,"THAT":2,"appear":2,"IN":2,"specif":[2,5],"impli":2,"OF":2,"non":2,"infring":2,"merchant":2,"fit":[2,7],"particular":2,"event":2,"WILL":2,"BE":2,"liabl":2,"relianc":2,"direct":2,"indirect":2,"special":2,"other":[2,7],"consequenti":2,"damag":2,"aris":2,"us":[2,3,5,6,7],"even":[2,5],"IF":2,"expressli":2,"advis":2,"possibl":[2,7],"SUCH":2,"arrow":2,"logo":2,"radeon":[2,7],"ryzen":2,"epyc":2,"combin":2,"thereof":2,"trademark":2,"advanc":2,"micro":2,"inc":2,"name":[2,5],"public":2,"identif":2,"compani":2,"googl":2,"regist":2,"llc":2,"pcie":2,"pci":2,"sig":2,"corpor":2,"linux":2,"linu":2,"torvald":2,"u":2,"countri":2,"ubuntu":[2,5,7],"canon":2,"ltd":2,"2023":2,"all":[2,5,7],"licens":[2,6],"you":[2,5,7],"directli":2,"own":2,"link":2,"A":2,"kind":2,"done":[2,7],"AT":2,"your":[2,5,7],"sole":2,"discret":2,"under":[2,5],"circumst":2,"instal":3,"contribut":3,"kernel":[3,4,7],"ck":[3,4,6],"suggest":3,"read":[3,7],"In":[4,7],"contrast":4,"api":[4,6],"refer":[4,6],"an":[4,7],"introduct":[4,6,7],"math":4,"which":[4,6,7],"underpin":4,"For":[4,5,6,7],"vector":4,"x":4,"2":[4,7],"ldot":4,"t":[4,5,7],"b":4,"we":[4,5,7],"can":[4,5,7],"decompos":4,"concaten":4,"begin":4,"align":4,"m":[4,7],"max":4,"f":4,"exp":4,"z":4,"operatornam":4,"end":[4,7],"where":4,"j":[4,5,7],"x_1":4,"x_b":4,"scalar":4,"matrix":[4,7],"t_r":4,"t_c":4,"x_":4,"ij":4,"b_r":4,"b_c":4,"row":4,"wise":4,"calcul":4,"tild":4,"_":4,"rowmax":4,"p":[4,7],"rowsum":4,"p_":4,"If":[4,5,7],"initi":[4,7],"sum":4,"first":[4,7],"column":4,"m_i":4,"i1":4,"z_i":4,"diag":4,"els":4,"_i":4,"y_":4,"ik":4,"z_":4,"reset":4,"variabl":4,"To":[5,7],"our":[5,7],"live":[5,7],"easier":[5,7],"bring":5,"depend":[5,7],"togeth":5,"recommend":5,"aim":[5,7],"program":5,"perform":[5,7],"critic":5,"machin":[5,7],"learn":[5,7],"workload":5,"across":5,"multipl":[5,7],"architectur":[5,7],"cpu":[5,7],"etc":5,"through":5,"gener":[5,7],"languag":5,"hip":5,"get":[5,6,7],"git":[5,7],"clone":[5,7],"http":[5,7],"github":[5,7],"com":[5,7],"rocmsoftwareplatform":[5,7],"composable_kernel":[5,7],"privileg":[5,7],"group":[5,7],"add":[5,7],"sudo":[5,7],"w":[5,7],"root":[5,7],"workspac":[5,7],"v":[5,7],"path_to_local_workspac":5,"rocm":[5,7],"ck_ub20":[5,7],"04_rocm5":[5,7],"3_releas":[5,7],"bin":[5,7],"bash":[5,7],"build":[5,6],"mkdir":[5,7],"cd":[5,7],"specifi":5,"target":[5,6],"exampl":[5,6],"below":5,"gfx908":[5,7],"gfx90a":[5,7],"cmake":[5,7],"d":[5,7],"cmake_prefix_path":[5,7],"opt":[5,7],"cmake_cxx_compil":[5,7],"hipcc":[5,7],"cmake_cxx_flag":[5,7],"o3":[5,7],"cmake_build_typ":[5,7],"gpu_target":[5,7],"test":[5,6],"case":[5,7],"also":[5,7],"example_gemm_xdl_fp16":[5,7],"test_gemm_fp16":[5,7],"more":[5,7],"visit":[5,7],"repo":[5,7],"have":[5,7],"everyth":[5,7],"compil":[5,7],"let":[5,7],"":[5,6,7],"take":5,"look":[5,7],"4_releas":5,"spec":5,"made":5,"ub20":5,"04":[5,7],"base":[5,7],"20":[5,7],"rocm5":5,"4":[5,7],"platform":5,"5":5,"just":5,"pick":[5,7],"project":[5,7],"re":5,"set":5,"custom":5,"stop":5,"tinker":5,"feel":5,"free":5,"adjust":[5,7],"dockerfil":5,"mit":5,"hello":6,"world":6,"motiv":6,"descript":6,"hardwar":6,"summari":6,"docker":[6,7],"hub":6,"why":6,"need":[6,7],"so":[6,7],"what":6,"And":6,"insid":[6,7],"imag":[6,7],"me":6,"dii":6,"here":[6,7],"primit":6,"datatyp":6,"devicemem":6,"flashattent":6,"contributor":6,"pull":6,"request":6,"guidelin":6,"disclaim":6,"amd":[6,7],"standard":6,"legal":6,"third":6,"parti":6,"tutori":7,"engin":7,"deal":7,"artifici":7,"intellig":7,"who":7,"would":7,"optim":7,"pipelin":7,"squeez":7,"everi":7,"drop":7,"ad":7,"approach":7,"latest":7,"doesn":7,"bleed":7,"edg":7,"featur":7,"reproduc":7,"now":7,"forev":7,"dure":7,"sai":7,"futur":7,"go":7,"depth":7,"breadth":7,"familiar":7,"tool":7,"wai":7,"integr":7,"modern":7,"ai":7,"technologi":7,"solv":7,"problem":7,"imagin":7,"field":7,"craft":7,"workflow":7,"still":7,"challeng":7,"one":7,"heavi":7,"lift":7,"collect":7,"oper":7,"creat":7,"ones":7,"requir":7,"major":7,"neural":7,"network":7,"convolut":7,"contract":7,"reduct":7,"modul":7,"varieti":7,"activ":7,"fuse":7,"almost":7,"reach":7,"speed":7,"light":7,"acceler":7,"abil":7,"layer":7,"structur":7,"transform":7,"low":7,"precis":7,"fp16":7,"bf16":7,"int8":7,"int4":7,"excit":7,"benchmark":7,"result":7,"awesom":7,"blog":7,"post":7,"fulli":7,"gfx1030":7,"check":7,"hand":7,"decid":7,"instinct":7,"mi100":7,"mi210":7,"mi250":7,"mi250x":7,"pro":7,"v620":7,"w6800":7,"w6800x":7,"duo":7,"w6900x":7,"rx":7,"6800":7,"xt":7,"6900":7,"xtx":7,"6950":7,"There":7,"cloud":7,"option":7,"find":7,"don":7,"rebas":7,"checkout":7,"tutorial_hello_world":7,"prepar":7,"necessari":7,"v5":7,"3":7,"current":7,"folder":7,"home":7,"line":7,"path":7,"navig":7,"directori":7,"previou":7,"talk":7,"about":7,"onc":7,"build_dev":7,"off":7,"went":7,"well":7,"up":7,"file":7,"been":7,"written":7,"final":7,"smooth":7,"ll":7,"see":7,"scan":7,"100":7,"built":7,"ctest":7,"n":7,"them":7,"separ":7,"execut":7,"argument":7,"mean":7,"want":7,"mode":7,"verifi":7,"matric":7,"integ":7,"plai":7,"around":7,"goe":7,"should":7,"someth":7,"a_m_k":7,"dim":7,"length":7,"3840":7,"4096":7,"stride":7,"b_k_n":7,"c_m_n":7,"launch_and_time_kernel":7,"grid_dim":7,"480":7,"block_dim":7,"256":7,"warm":7,"10":7,"perf":7,"10017":7,"117":7,"tflop":7,"87":7,"6854":7,"gb":7,"devicegemmxdl":7,"128":7,"8":7,"32":7,"numprefetch":7,"meanwhil":7,"But":7,"panic":7,"example_gemm_dl_fp16":7,"nice":7,"similar":7,"arg":7,"a_grid_desc_k0_m0_m1_k1_":7,"2048":7,"b_grid_desc_k0_n0_n1_k1_":7,"c_grid_desc_m_n_":7,"960":7,"65695":7,"35":7,"234":7,"26":7,"3797":7,"devicegemmdl":7,"16":7,"Or":7,"121":7,"pass":7,"51":7,"81":7,"sec":7,"0":7,"fail":7,"out":7,"took":7,"ran":7,"stai":7,"tune":7,"next":7,"config":7,"best":7,"task":7,"forget":7,"switch":7,"instanc":7,"launch":7,"better":7,"spend":7,"monei":7,"sure":7},"objects":{"":[[0,0,1,"_CPPv49DeviceMem","DeviceMem"],[0,0,1,"_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E","ck::BlockwiseGemmXdlops_v2"],[0,1,1,"_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E","ck::BlockwiseGemmXdlops_v2::AMmaKStride"],[0,1,1,"_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E","ck::BlockwiseGemmXdlops_v2::AMmaTileDesc"],[0,1,1,"_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E","ck::BlockwiseGemmXdlops_v2::ATileDesc"],[0,1,1,"_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E","ck::BlockwiseGemmXdlops_v2::BMmaKStride"],[0,1,1,"_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E","ck::BlockwiseGemmXdlops_v2::BMmaTileDesc"],[0,1,1,"_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E","ck::BlockwiseGemmXdlops_v2::BTileDesc"],[0,1,1,"_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E","ck::BlockwiseGemmXdlops_v2::BlockSize"],[0,1,1,"_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E","ck::BlockwiseGemmXdlops_v2::FloatAB"],[0,1,1,"_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E","ck::BlockwiseGemmXdlops_v2::FloatAcc"],[0,1,1,"_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E","ck::BlockwiseGemmXdlops_v2::KPack"],[0,1,1,"_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E","ck::BlockwiseGemmXdlops_v2::KPerBlock"],[0,1,1,"_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E","ck::BlockwiseGemmXdlops_v2::MPerBlock"],[0,1,1,"_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E","ck::BlockwiseGemmXdlops_v2::MPerXDL"],[0,1,1,"_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E","ck::BlockwiseGemmXdlops_v2::MRepeat"],[0,1,1,"_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E","ck::BlockwiseGemmXdlops_v2::NPerBlock"],[0,1,1,"_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E","ck::BlockwiseGemmXdlops_v2::NPerXDL"],[0,1,1,"_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E","ck::BlockwiseGemmXdlops_v2::NRepeat"],[0,1,1,"_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E","ck::BlockwiseGemmXdlops_v2::TransposeC"],[0,0,1,"_CPPv4I_7index_t0000_bEN2ck16BlockwiseSoftmaxE","ck::BlockwiseSoftmax"],[0,1,1,"_CPPv4I_7index_t0000_bEN2ck16BlockwiseSoftmaxE","ck::BlockwiseSoftmax::AccDataType"],[0,1,1,"_CPPv4I_7index_t0000_bEN2ck16BlockwiseSoftmaxE","ck::BlockwiseSoftmax::BlockSize"],[0,1,1,"_CPPv4I_7index_t0000_bEN2ck16BlockwiseSoftmaxE","ck::BlockwiseSoftmax::IgnoreNaN"],[0,1,1,"_CPPv4I_7index_t0000_bEN2ck16BlockwiseSoftmaxE","ck::BlockwiseSoftmax::ThreadClusterDesc_M_K"],[0,1,1,"_CPPv4I_7index_t0000_bEN2ck16BlockwiseSoftmaxE","ck::BlockwiseSoftmax::ThreadMap_M_K"],[0,1,1,"_CPPv4I_7index_t0000_bEN2ck16BlockwiseSoftmaxE","ck::BlockwiseSoftmax::ThreadSliceDesc_M_K"],[0,0,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::ABlockLdsExtraM"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::ABlockTransferDstScalarPerVector_AK1"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::ABlockTransferSrcAccessOrder"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::ABlockTransferSrcScalarPerVector"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::ABlockTransferSrcVectorDim"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::ABlockTransferThreadClusterArrangeOrder"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::ABlockTransferThreadClusterLengths_AK0_M_AK1"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::AElementwiseOperation"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::AGridDesc_AK0_M_AK1"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::AK1Value"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::AThreadTransferSrcResetCoordinateAfterRun"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::AccElementwiseOperation"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::B1BlockLdsExtraN"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::B1BlockTransferDstScalarPerVector_BK1"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::B1BlockTransferSrcAccessOrder"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::B1BlockTransferSrcScalarPerVector"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::B1BlockTransferSrcVectorDim"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::B1BlockTransferThreadClusterArrangeOrder"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::B1BlockTransferThreadClusterLengths_BK0_N_BK1"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::B1ElementwiseOperation"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::B1GridDesc_BK0_N_BK1"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::B1K1Value"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::B1ThreadTransferSrcResetCoordinateAfterRun"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::BBlockLdsExtraN"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::BBlockTransferDstScalarPerVector_BK1"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::BBlockTransferSrcAccessOrder"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::BBlockTransferSrcScalarPerVector"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::BBlockTransferSrcVectorDim"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::BBlockTransferThreadClusterArrangeOrder"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::BBlockTransferThreadClusterLengths_BK0_N_BK1"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::BElementwiseOperation"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::BGridDesc_BK0_N_BK1"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::BK1Value"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::BThreadTransferSrcResetCoordinateAfterRun"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::BlockSize"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::CElementwiseOperation"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::CGlobalMemoryDataOperation"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::CGridDesc_M_N"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::CShuffleBlockTransferScalarPerVector_NPerBlock"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::CShuffleMXdlPerWavePerShuffle"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::CShuffleNXdlPerWavePerShuffle"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::FloatAB"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::FloatC"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::FloatCShuffle"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::FloatGemmAcc"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::Gemm1KPerBlock"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::Gemm1NPerBlock"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::Gemm1NXdlPerWave"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::KPerBlock"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::LoopSched"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::MPerBlock"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::MPerXdl"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::MXdlPerWave"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::MaskOutUpperTriangle"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::NPerBlock"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::NPerXdl"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::NXdlPerWave"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::NumGemmKPrefetchStage"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::PadN"],[0,1,1,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE","ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::PipelineVer"],[0,0,1,"_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E","ck::ThreadGroupTensorSliceTransfer_v4r1"],[0,1,1,"_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E","ck::ThreadGroupTensorSliceTransfer_v4r1::BlockSliceLengths"],[0,1,1,"_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E","ck::ThreadGroupTensorSliceTransfer_v4r1::DstData"],[0,1,1,"_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E","ck::ThreadGroupTensorSliceTransfer_v4r1::DstDesc"],[0,1,1,"_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E","ck::ThreadGroupTensorSliceTransfer_v4r1::DstDimAccessOrder"],[0,1,1,"_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E","ck::ThreadGroupTensorSliceTransfer_v4r1::DstElementwiseOperation"],[0,1,1,"_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E","ck::ThreadGroupTensorSliceTransfer_v4r1::DstInMemOp"],[0,1,1,"_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E","ck::ThreadGroupTensorSliceTransfer_v4r1::DstScalarPerVector"],[0,1,1,"_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E","ck::ThreadGroupTensorSliceTransfer_v4r1::DstScalarStrideInVector"],[0,1,1,"_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E","ck::ThreadGroupTensorSliceTransfer_v4r1::DstVectorDim"],[0,1,1,"_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E","ck::ThreadGroupTensorSliceTransfer_v4r1::NumThreadScratch"],[0,1,1,"_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E","ck::ThreadGroupTensorSliceTransfer_v4r1::SrcData"],[0,1,1,"_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E","ck::ThreadGroupTensorSliceTransfer_v4r1::SrcDesc"],[0,1,1,"_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E","ck::ThreadGroupTensorSliceTransfer_v4r1::SrcDimAccessOrder"],[0,1,1,"_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E","ck::ThreadGroupTensorSliceTransfer_v4r1::SrcElementwiseOperation"],[0,1,1,"_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E","ck::ThreadGroupTensorSliceTransfer_v4r1::SrcScalarPerVector"],[0,1,1,"_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E","ck::ThreadGroupTensorSliceTransfer_v4r1::SrcScalarStrideInVector"],[0,1,1,"_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E","ck::ThreadGroupTensorSliceTransfer_v4r1::SrcVectorDim"],[0,1,1,"_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E","ck::ThreadGroupTensorSliceTransfer_v4r1::ThreadClusterArrangeOrder"],[0,1,1,"_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E","ck::ThreadGroupTensorSliceTransfer_v4r1::ThreadClusterLengths"],[0,1,1,"_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E","ck::ThreadGroupTensorSliceTransfer_v4r1::ThreadGroup"],[0,1,1,"_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E","ck::ThreadGroupTensorSliceTransfer_v4r1::ThreadTransferDstResetCoordinateAfterRun"],[0,1,1,"_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E","ck::ThreadGroupTensorSliceTransfer_v4r1::ThreadTransferSrcResetCoordinateAfterRun"],[0,0,1,"_CPPv4I0000000_7index_t_7index_t_N9enable_ifIXaaclN7SrcDesc20IsKnownAtCompileTimeEEclN7DstDesc20IsKnownAtCompileTimeEEEbE4typeEEN2ck44ThreadwiseTensorSliceTransfer_StaticToStaticE","ck::ThreadwiseTensorSliceTransfer_StaticToStatic"],[0,1,1,"_CPPv4I0000000_7index_t_7index_t_N9enable_ifIXaaclN7SrcDesc20IsKnownAtCompileTimeEEclN7DstDesc20IsKnownAtCompileTimeEEEbE4typeEEN2ck44ThreadwiseTensorSliceTransfer_StaticToStaticE","ck::ThreadwiseTensorSliceTransfer_StaticToStatic::DimAccessOrder"],[0,1,1,"_CPPv4I0000000_7index_t_7index_t_N9enable_ifIXaaclN7SrcDesc20IsKnownAtCompileTimeEEclN7DstDesc20IsKnownAtCompileTimeEEEbE4typeEEN2ck44ThreadwiseTensorSliceTransfer_StaticToStaticE","ck::ThreadwiseTensorSliceTransfer_StaticToStatic::DstData"],[0,1,1,"_CPPv4I0000000_7index_t_7index_t_N9enable_ifIXaaclN7SrcDesc20IsKnownAtCompileTimeEEclN7DstDesc20IsKnownAtCompileTimeEEEbE4typeEEN2ck44ThreadwiseTensorSliceTransfer_StaticToStaticE","ck::ThreadwiseTensorSliceTransfer_StaticToStatic::DstDesc"],[0,1,1,"_CPPv4I0000000_7index_t_7index_t_N9enable_ifIXaaclN7SrcDesc20IsKnownAtCompileTimeEEclN7DstDesc20IsKnownAtCompileTimeEEEbE4typeEEN2ck44ThreadwiseTensorSliceTransfer_StaticToStaticE","ck::ThreadwiseTensorSliceTransfer_StaticToStatic::DstScalarPerVector"],[0,1,1,"_CPPv4I0000000_7index_t_7index_t_N9enable_ifIXaaclN7SrcDesc20IsKnownAtCompileTimeEEclN7DstDesc20IsKnownAtCompileTimeEEEbE4typeEEN2ck44ThreadwiseTensorSliceTransfer_StaticToStaticE","ck::ThreadwiseTensorSliceTransfer_StaticToStatic::DstVectorDim"],[0,1,1,"_CPPv4I0000000_7index_t_7index_t_N9enable_ifIXaaclN7SrcDesc20IsKnownAtCompileTimeEEclN7DstDesc20IsKnownAtCompileTimeEEEbE4typeEEN2ck44ThreadwiseTensorSliceTransfer_StaticToStaticE","ck::ThreadwiseTensorSliceTransfer_StaticToStatic::ElementwiseOperation"],[0,1,1,"_CPPv4I0000000_7index_t_7index_t_N9enable_ifIXaaclN7SrcDesc20IsKnownAtCompileTimeEEclN7DstDesc20IsKnownAtCompileTimeEEEbE4typeEEN2ck44ThreadwiseTensorSliceTransfer_StaticToStaticE","ck::ThreadwiseTensorSliceTransfer_StaticToStatic::SliceLengths"],[0,1,1,"_CPPv4I0000000_7index_t_7index_t_N9enable_ifIXaaclN7SrcDesc20IsKnownAtCompileTimeEEclN7DstDesc20IsKnownAtCompileTimeEEEbE4typeEEN2ck44ThreadwiseTensorSliceTransfer_StaticToStaticE","ck::ThreadwiseTensorSliceTransfer_StaticToStatic::SrcData"],[0,1,1,"_CPPv4I0000000_7index_t_7index_t_N9enable_ifIXaaclN7SrcDesc20IsKnownAtCompileTimeEEclN7DstDesc20IsKnownAtCompileTimeEEEbE4typeEEN2ck44ThreadwiseTensorSliceTransfer_StaticToStaticE","ck::ThreadwiseTensorSliceTransfer_StaticToStatic::SrcDesc"]]},"objtypes":{"0":"cpp:class","1":"cpp:templateParam"},"objnames":{"0":["cpp","class","C++ class"],"1":["cpp","templateParam","C++ template parameter"]},"titleterms":{"api":0,"refer":0,"guid":[0,1,3,4,6],"introduct":[0,3],"us":0,"ck":[0,5,7],"datatyp":0,"devicemem":0,"kernel":[0,5,6],"For":0,"flashattent":0,"contributor":1,"":[1,2],"pull":1,"request":1,"guidelin":1,"disclaim":2,"amd":2,"standard":2,"legal":2,"third":2,"parti":2,"get":3,"start":[3,5],"document":3,"roadmap":3,"support":4,"primit":4,"softmax":4,"docker":5,"hub":5,"why":5,"do":5,"i":5,"need":5,"thi":5,"so":5,"what":5,"compos":[5,6],"And":5,"insid":5,"which":5,"imag":5,"right":5,"me":5,"dii":5,"here":5,"licens":5,"user":6,"content":6,"hello":7,"world":7,"motiv":7,"descript":7,"hardwar":7,"target":7,"build":7,"librari":7,"run":7,"exampl":7,"test":7,"summari":7},"envversion":{"sphinx.domains.c":2,"sphinx.domains.changeset":1,"sphinx.domains.citation":1,"sphinx.domains.cpp":8,"sphinx.domains.index":1,"sphinx.domains.javascript":2,"sphinx.domains.math":2,"sphinx.domains.python":3,"sphinx.domains.rst":2,"sphinx.domains.std":2,"sphinxcontrib.bibtex":9,"sphinx":57},"alltitles":{"API Reference Guide":[[0,"api-reference-guide"]],"Introduction":[[0,"introduction"],[3,"introduction"]],"Using CK API":[[0,"using-ck-api"]],"CK Datatypes":[[0,"ck-datatypes"]],"DeviceMem":[[0,"devicemem"]],"Kernels For Flashattention":[[0,"kernels-for-flashattention"]],"Contributor\u2019s Guide":[[1,"contributor-s-guide"]],"Pull-request guidelines":[[1,"pull-request-guidelines"]],"Disclaimer":[[2,"disclaimer"]],"AMD\u2019s standard legal Disclaimer":[[2,"amd-s-standard-legal-disclaimer"]],"Third Party Disclaimer":[[2,"third-party-disclaimer"]],"Getting Started Guide":[[3,"getting-started-guide"]],"Documentation Roadmap":[[3,"documentation-roadmap"]],"Supported Primitives Guide":[[4,"supported-primitives-guide"]],"Softmax":[[4,"softmax"]],"CK docker hub":[[5,"ck-docker-hub"]],"Why do I need this?":[[5,"why-do-i-need-this"]],"So what is Composable Kernel?":[[5,"so-what-is-composable-kernel"]],"And what is inside?":[[5,"and-what-is-inside"]],"Which image is right for me?":[[5,"which-image-is-right-for-me"]],"DIY starts here":[[5,"diy-starts-here"]],"License":[[5,"license"]],"Composable Kernel User Guide":[[6,"composable-kernel-user-guide"]],"Contents:":[[6,null]],"CK Hello world":[[7,"ck-hello-world"]],"Motivation":[[7,"motivation"]],"Description":[[7,"description"]],"Hardware targets":[[7,"hardware-targets"]],"Build the library":[[7,"build-the-library"]],"Run examples and tests":[[7,"run-examples-and-tests"]],"Summary":[[7,"summary"]]},"indexentries":{"devicemem (c++ struct)":[[0,"_CPPv49DeviceMem"]],"ck::blockwisegemmxdlops_v2 (c++ struct)":[[0,"_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E"]],"ck::blockwisesoftmax (c++ struct)":[[0,"_CPPv4I_7index_t0000_bEN2ck16BlockwiseSoftmaxE"]],"ck::gridwisebatchedgemmsoftmaxgemm_xdl_cshuffle (c++ struct)":[[0,"_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE"]],"ck::threadgrouptensorslicetransfer_v4r1 (c++ struct)":[[0,"_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E"]],"ck::threadwisetensorslicetransfer_statictostatic (c++ struct)":[[0,"_CPPv4I0000000_7index_t_7index_t_N9enable_ifIXaaclN7SrcDesc20IsKnownAtCompileTimeEEclN7DstDesc20IsKnownAtCompileTimeEEEbE4typeEEN2ck44ThreadwiseTensorSliceTransfer_StaticToStaticE"]]}})