Search.setIndex({"docnames": ["API_Reference_Guide", "Contributors_Guide", "Disclaimer", "Linux_Install_Guide", "Supported_Primitives_Guide", "dockerhub", "index", "tutorial_hello_world"], "filenames": ["API_Reference_Guide.rst", "Contributors_Guide.rst", "Disclaimer.rst", "Linux_Install_Guide.rst", "Supported_Primitives_Guide.rst", "dockerhub.rst", "index.rst", "tutorial_hello_world.rst"], "titles": ["5. API Reference Guide", "6. Contributor\u2019s Guide", "7. Disclaimer", "1. Getting Started Guide", "4. Supported Primitives Guide", "3. CK docker hub", "Composable Kernel User Guide", "2. CK Hello world"], "terms": {"thi": [0, 2, 3, 4, 6, 7], "document": [0, 2, 4, 6], "contain": [0, 2, 3, 4, 5, 7], "detail": [0, 4, 5, 7], "compos": [0, 3, 4, 7], "librari": [0, 5, 6], "introduc": 0, "some": [0, 7], "kei": 0, "design": 0, "principl": 0, "ar": [0, 2, 5, 7], "write": [0, 5], "new": [0, 2, 4, 7], "class": 0, "extend": 0, "function": [0, 7], "section": [0, 7], "describ": 0, "how": [0, 7], "struct": 0, "store": 0, "data": [0, 7], "gpu": [0, 5, 7], "devic": [0, 2, 7], "memori": 0, "The": [0, 2, 3, 5, 7], "algorithm": [0, 4], "i": [0, 2, 3, 4, 6, 7], "defin": 0, "dao": 0, "et": 0, "al": 0, "dfe": 0, "22": 0, "list": [0, 3, 7], "implement": [0, 4], "gridwis": 0, "templat": 0, "typenam": 0, "floatab": 0, "floatgemmacc": 0, "floatcshuffl": 0, "floatc": 0, "aelementwiseoper": 0, "belementwiseoper": 0, "accelementwiseoper": 0, "b1elementwiseoper": 0, "celementwiseoper": 0, "inmemorydataoperationenum": 0, "cglobalmemorydataoper": 0, "agriddesc_ak0_m_ak1": 0, "bgriddesc_bk0_n_bk1": 0, "b1griddesc_bk0_n_bk1": 0, "cgriddesc_m_n": 0, "index_t": 0, "numgemmkprefetchstag": 0, "blocksiz": 0, "mperblock": 0, "nperblock": 0, "kperblock": 0, "gemm1nperblock": 0, "gemm1kperblock": 0, "ak1valu": 0, "bk1valu": 0, "b1k1valu": 0, "mperxdl": 0, "nperxdl": 0, "mxdlperwav": 0, "nxdlperwav": 0, "gemm1nxdlperwav": 0, "ablocktransferthreadclusterlengths_ak0_m_ak1": 0, "ablocktransferthreadclusterarrangeord": 0, "ablocktransfersrcaccessord": 0, "ablocktransfersrcvectordim": 0, "ablocktransfersrcscalarpervector": 0, "ablocktransferdstscalarpervector_ak1": 0, "bool": 0, "athreadtransfersrcresetcoordinateafterrun": 0, "ablockldsextram": 0, "bblocktransferthreadclusterlengths_bk0_n_bk1": 0, "bblocktransferthreadclusterarrangeord": 0, "bblocktransfersrcaccessord": 0, "bblocktransfersrcvectordim": 0, "bblocktransfersrcscalarpervector": 0, "bblocktransferdstscalarpervector_bk1": 0, "bthreadtransfersrcresetcoordinateafterrun": 0, "bblockldsextran": 0, "b1blocktransferthreadclusterlengths_bk0_n_bk1": 0, "b1blocktransferthreadclusterarrangeord": 0, "b1blocktransfersrcaccessord": 0, "b1blocktransfersrcvectordim": 0, "b1blocktransfersrcscalarpervector": 0, "b1blocktransferdstscalarpervector_bk1": 0, "b1threadtransfersrcresetcoordinateafterrun": 0, "b1blockldsextran": 0, "cshufflemxdlperwavepershuffl": 0, "cshufflenxdlperwavepershuffl": 0, "cshuffleblocktransferclusterlengths_mblock_mperblock_nblock_nperblock": 0, "cshuffleblocktransferscalarpervector_nperblock": 0, "loopschedul": [0, 7], "loopsch": 0, "padn": 0, "maskoutuppertriangl": 0, "pipelinevers": [0, 7], "pipelinev": 0, "v1": [0, 7], "gridwisebatchedgemmsoftmaxgemm_xdl_cshuffl": 0, "gemm": 0, "softmax": [0, 6], "fusion": 0, "blockwis": 0, "threadgroup": 0, "srcelementwiseoper": 0, "dstelementwiseoper": 0, "dstinmemop": 0, "blockslicelength": 0, "threadclusterlength": 0, "threadclusterarrangeord": 0, "srcdata": 0, "dstdata": 0, "srcdesc": 0, "dstdesc": 0, "srcdimaccessord": 0, "dstdimaccessord": 0, "srcvectordim": 0, "dstvectordim": 0, "srcscalarpervector": 0, "dstscalarpervector": 0, "srcscalarstrideinvector": 0, "dstscalarstrideinvector": 0, "threadtransfersrcresetcoordinateafterrun": 0, "threadtransferdstresetcoordinateafterrun": 0, "numthreadscratch": 0, "1": [0, 4, 7], "threadgrouptensorslicetransfer_v4r1": 0, "transfer": 0, "version": [0, 2, 5, 7], "doe": [0, 7], "follow": [0, 3, 4], "thing": 0, "avoid": 0, "scratch": 0, "issu": 0, "staticallyindexedarrai": 0, "instead": 0, "c": [0, 2, 5], "arrai": 0, "thread": 0, "buffer": 0, "threadwisetensorslicetransfer_v3": 0, "keep": 0, "tensor": [0, 7], "descriptor": 0, "run": [0, 4, 5, 6], "construct": 0, "coordin": [0, 7], "floatacc": 0, "atiledesc": 0, "btiledesc": 0, "ammatiledesc": 0, "bmmatiledesc": 0, "mrepeat": 0, "nrepeat": 0, "kpack": 0, "transposec": 0, "fals": 0, "ammakstrid": 0, "xdlopsgemm": 0, "k0perxdlop": 0, "bmmakstrid": 0, "blockwisegemmxdlops_v2": 0, "support": [0, 6, 7], "regular": 0, "xdl": 0, "output": [0, 4, 7], "m2_m3_m4_m2": 0, "transpos": 0, "m2_n2_n3_n4": 0, "decoupl": 0, "input": 0, "tile": [0, 4, 7], "mma": 0, "order": [0, 3], "both": 0, "vgpr": 0, "ld": 0, "sourc": 0, "configur": [0, 7], "k": [0, 4], "index": 0, "start": [0, 6, 7], "posit": 0, "step": 0, "size": [0, 4], "after": 0, "each": 0, "fma": 0, "instruct": [0, 3], "accdatatyp": 0, "threadmap_m_k": 0, "threadclusterdesc_m_k": 0, "threadslicedesc_m_k": 0, "ignorenan": 0, "blockwisesoftmax": 0, "paramet": [0, 7], "block": [0, 4], "accumul": 0, "type": [0, 7], "id": [0, 5], "m_k": 0, "threadwis": 0, "cluster": 0, "slice": 0, "flag": [0, 7], "ignor": 0, "nan": 0, "default": [0, 7], "elementwiseoper": 0, "slicelength": 0, "dimaccessord": 0, "enable_if": 0, "isknownatcompiletim": 0, "threadwisetensorslicetransfer_statictostat": 0, "do": [0, 6, 7], "NOT": 0, "involv": 0, "ani": [0, 2], "staticbuff": 0, "tri": 0, "daniel": 0, "y": [0, 4], "fu": 0, "stefano": 0, "ermon": 0, "atri": 0, "rudra": 0, "christoph": 0, "r": [0, 2, 7], "\u00e9": 0, "fast": [0, 7], "effici": [0, 7], "exact": 0, "attent": [0, 7], "io": 0, "awar": 0, "arxiv": 0, "preprint": 0, "2205": 0, "14135": 0, "2022": 0, "todo": [1, 3], "inform": 2, "present": 2, "purpos": [2, 5], "onli": [2, 7], "mai": 2, "technic": [2, 7], "inaccuraci": 2, "omiss": 2, "typograph": 2, "error": 2, "herein": 2, "subject": 2, "chang": [2, 7], "render": 2, "inaccur": 2, "mani": [2, 7], "reason": 2, "includ": [2, 5, 7], "limit": 2, "product": 2, "roadmap": [2, 6], "compon": [2, 7], "motherboard": 2, "model": [2, 5, 7], "releas": [2, 5, 7], "differ": [2, 7], "between": 2, "manufactur": 2, "softwar": 2, "bio": 2, "flash": 2, "firmwar": 2, "upgrad": 2, "like": [2, 5, 7], "comput": [2, 4, 7], "system": [2, 7], "ha": [2, 7], "risk": 2, "secur": 2, "vulner": 2, "cannot": 2, "complet": 2, "prevent": 2, "mitig": 2, "assum": 2, "oblig": 2, "updat": [2, 4], "otherwis": 2, "correct": 2, "revis": 2, "howev": 2, "reserv": 2, "right": [2, 6, 7], "make": [2, 5, 7], "from": [2, 4, 7], "time": [2, 4, 7], "content": 2, "hereof": 2, "without": 2, "notifi": 2, "person": 2, "provid": [2, 5], "AS": 2, "NO": 2, "represent": 2, "OR": 2, "warranti": 2, "WITH": 2, "respect": 2, "TO": 2, "THE": 2, "AND": 2, "respons": 2, "FOR": 2, "THAT": 2, "appear": 2, "IN": 2, "specif": [2, 5], "impli": 2, "OF": 2, "non": 2, "infring": 2, "merchant": 2, "fit": [2, 7], "particular": 2, "event": 2, "WILL": 2, "BE": 2, "liabl": 2, "relianc": 2, "direct": 2, "indirect": 2, "special": 2, "other": [2, 7], "consequenti": 2, "damag": 2, "aris": 2, "us": [2, 3, 5, 6, 7], "even": [2, 5], "IF": 2, "expressli": 2, "advis": 2, "possibl": [2, 7], "SUCH": 2, "arrow": 2, "logo": 2, "radeon": [2, 7], "ryzen": 2, "epyc": 2, "combin": 2, "thereof": 2, "trademark": 2, "advanc": 2, "micro": 2, "inc": 2, "name": [2, 5], "public": 2, "identif": 2, "compani": 2, "googl": 2, "regist": 2, "llc": 2, "pcie": 2, "pci": 2, "sig": 2, "corpor": 2, "linux": 2, "linu": 2, "torvald": 2, "u": 2, "countri": 2, "ubuntu": [2, 5, 7], "canon": 2, "ltd": 2, "2023": 2, "all": [2, 5, 7], "licens": [2, 6], "you": [2, 5, 7], "directli": 2, "own": 2, "link": 2, "A": 2, "kind": 2, "done": [2, 7], "AT": 2, "your": [2, 5, 7], "sole": 2, "discret": 2, "under": [2, 5], "circumst": 2, "instal": 3, "contribut": 3, "kernel": [3, 4, 7], "ck": [3, 4, 6], "suggest": 3, "read": [3, 7], "In": [4, 7], "contrast": 4, "api": [4, 6], "refer": [4, 6], "an": [4, 7], "introduct": [4, 6, 7], "math": 4, "which": [4, 6, 7], "underpin": 4, "For": [4, 5, 6, 7], "vector": 4, "x": 4, "2": [4, 7], "ldot": 4, "t": [4, 5, 7], "b": 4, "we": [4, 5, 7], "can": [4, 5, 7], "decompos": 4, "concaten": 4, "begin": 4, "align": 4, "m": [4, 7], "max": 4, "f": 4, "exp": 4, "z": 4, "operatornam": 4, "end": [4, 7], "where": 4, "j": [4, 5, 7], "x_1": 4, "x_b": 4, "scalar": 4, "matrix": [4, 7], "t_r": 4, "t_c": 4, "x_": 4, "ij": 4, "b_r": 4, "b_c": 4, "row": 4, "wise": 4, "calcul": 4, "tild": 4, "_": 4, "rowmax": 4, "p": [4, 7], "rowsum": 4, "p_": 4, "If": [4, 5, 7], "initi": [4, 7], "sum": 4, "first": [4, 7], "column": 4, "m_i": 4, "i1": 4, "z_i": 4, "diag": 4, "els": 4, "_i": 4, "y_": 4, "ik": 4, "z_": 4, "reset": 4, "variabl": 4, "To": [5, 7], "our": [5, 7], "live": [5, 7], "easier": [5, 7], "bring": 5, "depend": [5, 7], "togeth": 5, "recommend": 5, "aim": [5, 7], "program": 5, "perform": [5, 7], "critic": 5, "machin": [5, 7], "learn": [5, 7], "workload": 5, "across": 5, "multipl": [5, 7], "architectur": [5, 7], "cpu": [5, 7], "etc": 5, "through": 5, "gener": [5, 7], "languag": 5, "hip": 5, "get": [5, 6, 7], "git": [5, 7], "clone": [5, 7], "http": [5, 7], "github": [5, 7], "com": [5, 7], "rocmsoftwareplatform": [5, 7], "composable_kernel": [5, 7], "privileg": [5, 7], "group": [5, 7], "add": [5, 7], "sudo": [5, 7], "w": [5, 7], "root": [5, 7], "workspac": [5, 7], "v": [5, 7], "path_to_local_workspac": 5, "rocm": [5, 7], "ck_ub20": [5, 7], "04_rocm5": [5, 7], "3_releas": [5, 7], "bin": [5, 7], "bash": [5, 7], "build": [5, 6], "mkdir": [5, 7], "cd": [5, 7], "specifi": 5, "target": [5, 6], "exampl": [5, 6], "below": 5, "gfx908": [5, 7], "gfx90a": [5, 7], "cmake": [5, 7], "d": [5, 7], "cmake_prefix_path": [5, 7], "opt": [5, 7], "cmake_cxx_compil": [5, 7], "hipcc": [5, 7], "cmake_cxx_flag": [5, 7], "o3": [5, 7], "cmake_build_typ": [5, 7], "gpu_target": [5, 7], "test": [5, 6], "case": [5, 7], "also": [5, 7], "example_gemm_xdl_fp16": [5, 7], "test_gemm_fp16": [5, 7], "more": [5, 7], "visit": [5, 7], "repo": [5, 7], "have": [5, 7], "everyth": [5, 7], "compil": [5, 7], "let": [5, 7], "": [5, 6, 7], "take": 5, "look": [5, 7], "4_releas": 5, "spec": 5, "made": 5, "ub20": 5, "04": [5, 7], "base": [5, 7], "20": [5, 7], "rocm5": 5, "4": [5, 7], "platform": 5, "5": 5, "just": 5, "pick": [5, 7], "project": [5, 7], "re": 5, "set": 5, "custom": 5, "stop": 5, "tinker": 5, "feel": 5, "free": 5, "adjust": [5, 7], "dockerfil": 5, "mit": 5, "hello": 6, "world": 6, "motiv": 6, "descript": 6, "hardwar": 6, "summari": 6, "docker": [6, 7], "hub": 6, "why": 6, "need": [6, 7], "so": [6, 7], "what": 6, "And": 6, "insid": [6, 7], "imag": [6, 7], "me": 6, "dii": 6, "here": [6, 7], "primit": 6, "datatyp": 6, "devicemem": 6, "flashattent": 6, "contributor": 6, "pull": 6, "request": 6, "guidelin": 6, "disclaim": 6, "amd": [6, 7], "standard": 6, "legal": 6, "third": 6, "parti": 6, "tutori": 7, "engin": 7, "deal": 7, "artifici": 7, "intellig": 7, "who": 7, "would": 7, "optim": 7, "pipelin": 7, "squeez": 7, "everi": 7, "drop": 7, "ad": 7, "approach": 7, "latest": 7, "doesn": 7, "bleed": 7, "edg": 7, "featur": 7, "reproduc": 7, "now": 7, "forev": 7, "dure": 7, "sai": 7, "futur": 7, "go": 7, "depth": 7, "breadth": 7, "familiar": 7, "tool": 7, "wai": 7, "integr": 7, "modern": 7, "ai": 7, "technologi": 7, "solv": 7, "problem": 7, "imagin": 7, "field": 7, "craft": 7, "workflow": 7, "still": 7, "challeng": 7, "one": 7, "heavi": 7, "lift": 7, "collect": 7, "oper": 7, "creat": 7, "ones": 7, "requir": 7, "major": 7, "neural": 7, "network": 7, "convolut": 7, "contract": 7, "reduct": 7, "modul": 7, "varieti": 7, "activ": 7, "fuse": 7, "almost": 7, "reach": 7, "speed": 7, "light": 7, "acceler": 7, "abil": 7, "layer": 7, "structur": 7, "transform": 7, "low": 7, "precis": 7, "fp16": 7, "bf16": 7, "int8": 7, "int4": 7, "excit": 7, "benchmark": 7, "result": 7, "awesom": 7, "blog": 7, "post": 7, "fulli": 7, "gfx1030": 7, "check": 7, "hand": 7, "decid": 7, "instinct": 7, "mi100": 7, "mi210": 7, "mi250": 7, "mi250x": 7, "pro": 7, "v620": 7, "w6800": 7, "w6800x": 7, "duo": 7, "w6900x": 7, "rx": 7, "6800": 7, "xt": 7, "6900": 7, "xtx": 7, "6950": 7, "There": 7, "cloud": 7, "option": 7, "find": 7, "don": 7, "rebas": 7, "checkout": 7, "tutorial_hello_world": 7, "prepar": 7, "necessari": 7, "v5": 7, "3": 7, "current": 7, "folder": 7, "home": 7, "line": 7, "path": 7, "navig": 7, "directori": 7, "previou": 7, "talk": 7, "about": 7, "onc": 7, "build_dev": 7, "off": 7, "went": 7, "well": 7, "up": 7, "file": 7, "been": 7, "written": 7, "final": 7, "smooth": 7, "ll": 7, "see": 7, "scan": 7, "100": 7, "built": 7, "ctest": 7, "n": 7, "them": 7, "separ": 7, "execut": 7, "argument": 7, "mean": 7, "want": 7, "mode": 7, "verifi": 7, "matric": 7, "integ": 7, "plai": 7, "around": 7, "goe": 7, "should": 7, "someth": 7, "a_m_k": 7, "dim": 7, "length": 7, "3840": 7, "4096": 7, "stride": 7, "b_k_n": 7, "c_m_n": 7, "launch_and_time_kernel": 7, "grid_dim": 7, "480": 7, "block_dim": 7, "256": 7, "warm": 7, "10": 7, "perf": 7, "10017": 7, "117": 7, "tflop": 7, "87": 7, "6854": 7, "gb": 7, "devicegemmxdl": 7, "128": 7, "8": 7, "32": 7, "numprefetch": 7, "meanwhil": 7, "But": 7, "panic": 7, "example_gemm_dl_fp16": 7, "nice": 7, "similar": 7, "arg": 7, "a_grid_desc_k0_m0_m1_k1_": 7, "2048": 7, "b_grid_desc_k0_n0_n1_k1_": 7, "c_grid_desc_m_n_": 7, "960": 7, "65695": 7, "35": 7, "234": 7, "26": 7, "3797": 7, "devicegemmdl": 7, "16": 7, "Or": 7, "121": 7, "pass": 7, "51": 7, "81": 7, "sec": 7, "0": 7, "fail": 7, "out": 7, "took": 7, "ran": 7, "stai": 7, "tune": 7, "next": 7, "config": 7, "best": 7, "task": 7, "forget": 7, "switch": 7, "instanc": 7, "launch": 7, "better": 7, "spend": 7, "monei": 7, "sure": 7}, "objects": {"": [[0, 0, 1, "_CPPv49DeviceMem", "DeviceMem"], [0, 0, 1, "_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E", "ck::BlockwiseGemmXdlops_v2"], [0, 1, 1, "_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E", "ck::BlockwiseGemmXdlops_v2::AMmaKStride"], [0, 1, 1, "_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E", "ck::BlockwiseGemmXdlops_v2::AMmaTileDesc"], [0, 1, 1, "_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E", "ck::BlockwiseGemmXdlops_v2::ATileDesc"], [0, 1, 1, "_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E", "ck::BlockwiseGemmXdlops_v2::BMmaKStride"], [0, 1, 1, "_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E", "ck::BlockwiseGemmXdlops_v2::BMmaTileDesc"], [0, 1, 1, "_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E", "ck::BlockwiseGemmXdlops_v2::BTileDesc"], [0, 1, 1, "_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E", "ck::BlockwiseGemmXdlops_v2::BlockSize"], [0, 1, 1, "_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E", "ck::BlockwiseGemmXdlops_v2::FloatAB"], [0, 1, 1, "_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E", "ck::BlockwiseGemmXdlops_v2::FloatAcc"], [0, 1, 1, "_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E", "ck::BlockwiseGemmXdlops_v2::KPack"], [0, 1, 1, "_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E", "ck::BlockwiseGemmXdlops_v2::KPerBlock"], [0, 1, 1, "_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E", "ck::BlockwiseGemmXdlops_v2::MPerBlock"], [0, 1, 1, "_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E", "ck::BlockwiseGemmXdlops_v2::MPerXDL"], [0, 1, 1, "_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E", "ck::BlockwiseGemmXdlops_v2::MRepeat"], [0, 1, 1, "_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E", "ck::BlockwiseGemmXdlops_v2::NPerBlock"], [0, 1, 1, "_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E", "ck::BlockwiseGemmXdlops_v2::NPerXDL"], [0, 1, 1, "_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E", "ck::BlockwiseGemmXdlops_v2::NRepeat"], [0, 1, 1, "_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E", "ck::BlockwiseGemmXdlops_v2::TransposeC"], [0, 0, 1, "_CPPv4I_7index_t0000_bEN2ck16BlockwiseSoftmaxE", "ck::BlockwiseSoftmax"], [0, 1, 1, "_CPPv4I_7index_t0000_bEN2ck16BlockwiseSoftmaxE", "ck::BlockwiseSoftmax::AccDataType"], [0, 1, 1, "_CPPv4I_7index_t0000_bEN2ck16BlockwiseSoftmaxE", "ck::BlockwiseSoftmax::BlockSize"], [0, 1, 1, "_CPPv4I_7index_t0000_bEN2ck16BlockwiseSoftmaxE", "ck::BlockwiseSoftmax::IgnoreNaN"], [0, 1, 1, "_CPPv4I_7index_t0000_bEN2ck16BlockwiseSoftmaxE", "ck::BlockwiseSoftmax::ThreadClusterDesc_M_K"], [0, 1, 1, "_CPPv4I_7index_t0000_bEN2ck16BlockwiseSoftmaxE", "ck::BlockwiseSoftmax::ThreadMap_M_K"], [0, 1, 1, "_CPPv4I_7index_t0000_bEN2ck16BlockwiseSoftmaxE", "ck::BlockwiseSoftmax::ThreadSliceDesc_M_K"], [0, 0, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::ABlockLdsExtraM"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::ABlockTransferDstScalarPerVector_AK1"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::ABlockTransferSrcAccessOrder"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::ABlockTransferSrcScalarPerVector"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::ABlockTransferSrcVectorDim"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::ABlockTransferThreadClusterArrangeOrder"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::ABlockTransferThreadClusterLengths_AK0_M_AK1"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::AElementwiseOperation"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::AGridDesc_AK0_M_AK1"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::AK1Value"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::AThreadTransferSrcResetCoordinateAfterRun"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::AccElementwiseOperation"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::B1BlockLdsExtraN"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::B1BlockTransferDstScalarPerVector_BK1"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::B1BlockTransferSrcAccessOrder"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::B1BlockTransferSrcScalarPerVector"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::B1BlockTransferSrcVectorDim"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::B1BlockTransferThreadClusterArrangeOrder"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::B1BlockTransferThreadClusterLengths_BK0_N_BK1"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::B1ElementwiseOperation"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::B1GridDesc_BK0_N_BK1"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::B1K1Value"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::B1ThreadTransferSrcResetCoordinateAfterRun"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::BBlockLdsExtraN"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::BBlockTransferDstScalarPerVector_BK1"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::BBlockTransferSrcAccessOrder"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::BBlockTransferSrcScalarPerVector"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::BBlockTransferSrcVectorDim"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::BBlockTransferThreadClusterArrangeOrder"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::BBlockTransferThreadClusterLengths_BK0_N_BK1"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::BElementwiseOperation"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::BGridDesc_BK0_N_BK1"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::BK1Value"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::BThreadTransferSrcResetCoordinateAfterRun"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::BlockSize"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::CElementwiseOperation"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::CGlobalMemoryDataOperation"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::CGridDesc_M_N"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::CShuffleBlockTransferScalarPerVector_NPerBlock"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::CShuffleMXdlPerWavePerShuffle"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::CShuffleNXdlPerWavePerShuffle"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::FloatAB"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::FloatC"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::FloatCShuffle"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::FloatGemmAcc"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::Gemm1KPerBlock"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::Gemm1NPerBlock"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::Gemm1NXdlPerWave"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::KPerBlock"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::LoopSched"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::MPerBlock"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::MPerXdl"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::MXdlPerWave"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::MaskOutUpperTriangle"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::NPerBlock"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::NPerXdl"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::NXdlPerWave"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::NumGemmKPrefetchStage"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::PadN"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::PipelineVer"], [0, 0, 1, "_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E", "ck::ThreadGroupTensorSliceTransfer_v4r1"], [0, 1, 1, "_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E", "ck::ThreadGroupTensorSliceTransfer_v4r1::BlockSliceLengths"], [0, 1, 1, "_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E", "ck::ThreadGroupTensorSliceTransfer_v4r1::DstData"], [0, 1, 1, "_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E", "ck::ThreadGroupTensorSliceTransfer_v4r1::DstDesc"], [0, 1, 1, "_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E", "ck::ThreadGroupTensorSliceTransfer_v4r1::DstDimAccessOrder"], [0, 1, 1, "_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E", "ck::ThreadGroupTensorSliceTransfer_v4r1::DstElementwiseOperation"], [0, 1, 1, "_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E", "ck::ThreadGroupTensorSliceTransfer_v4r1::DstInMemOp"], [0, 1, 1, "_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E", "ck::ThreadGroupTensorSliceTransfer_v4r1::DstScalarPerVector"], [0, 1, 1, "_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E", "ck::ThreadGroupTensorSliceTransfer_v4r1::DstScalarStrideInVector"], [0, 1, 1, "_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E", "ck::ThreadGroupTensorSliceTransfer_v4r1::DstVectorDim"], [0, 1, 1, "_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E", "ck::ThreadGroupTensorSliceTransfer_v4r1::NumThreadScratch"], [0, 1, 1, "_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E", "ck::ThreadGroupTensorSliceTransfer_v4r1::SrcData"], [0, 1, 1, "_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E", "ck::ThreadGroupTensorSliceTransfer_v4r1::SrcDesc"], [0, 1, 1, "_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E", "ck::ThreadGroupTensorSliceTransfer_v4r1::SrcDimAccessOrder"], [0, 1, 1, "_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E", "ck::ThreadGroupTensorSliceTransfer_v4r1::SrcElementwiseOperation"], [0, 1, 1, "_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E", "ck::ThreadGroupTensorSliceTransfer_v4r1::SrcScalarPerVector"], [0, 1, 1, "_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E", "ck::ThreadGroupTensorSliceTransfer_v4r1::SrcScalarStrideInVector"], [0, 1, 1, "_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E", "ck::ThreadGroupTensorSliceTransfer_v4r1::SrcVectorDim"], [0, 1, 1, "_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E", "ck::ThreadGroupTensorSliceTransfer_v4r1::ThreadClusterArrangeOrder"], [0, 1, 1, "_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E", "ck::ThreadGroupTensorSliceTransfer_v4r1::ThreadClusterLengths"], [0, 1, 1, "_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E", "ck::ThreadGroupTensorSliceTransfer_v4r1::ThreadGroup"], [0, 1, 1, "_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E", "ck::ThreadGroupTensorSliceTransfer_v4r1::ThreadTransferDstResetCoordinateAfterRun"], [0, 1, 1, "_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E", "ck::ThreadGroupTensorSliceTransfer_v4r1::ThreadTransferSrcResetCoordinateAfterRun"], [0, 0, 1, "_CPPv4I0000000_7index_t_7index_t_N9enable_ifIXaaclN7SrcDesc20IsKnownAtCompileTimeEEclN7DstDesc20IsKnownAtCompileTimeEEEbE4typeEEN2ck44ThreadwiseTensorSliceTransfer_StaticToStaticE", "ck::ThreadwiseTensorSliceTransfer_StaticToStatic"], [0, 1, 1, "_CPPv4I0000000_7index_t_7index_t_N9enable_ifIXaaclN7SrcDesc20IsKnownAtCompileTimeEEclN7DstDesc20IsKnownAtCompileTimeEEEbE4typeEEN2ck44ThreadwiseTensorSliceTransfer_StaticToStaticE", "ck::ThreadwiseTensorSliceTransfer_StaticToStatic::DimAccessOrder"], [0, 1, 1, "_CPPv4I0000000_7index_t_7index_t_N9enable_ifIXaaclN7SrcDesc20IsKnownAtCompileTimeEEclN7DstDesc20IsKnownAtCompileTimeEEEbE4typeEEN2ck44ThreadwiseTensorSliceTransfer_StaticToStaticE", "ck::ThreadwiseTensorSliceTransfer_StaticToStatic::DstData"], [0, 1, 1, "_CPPv4I0000000_7index_t_7index_t_N9enable_ifIXaaclN7SrcDesc20IsKnownAtCompileTimeEEclN7DstDesc20IsKnownAtCompileTimeEEEbE4typeEEN2ck44ThreadwiseTensorSliceTransfer_StaticToStaticE", "ck::ThreadwiseTensorSliceTransfer_StaticToStatic::DstDesc"], [0, 1, 1, "_CPPv4I0000000_7index_t_7index_t_N9enable_ifIXaaclN7SrcDesc20IsKnownAtCompileTimeEEclN7DstDesc20IsKnownAtCompileTimeEEEbE4typeEEN2ck44ThreadwiseTensorSliceTransfer_StaticToStaticE", "ck::ThreadwiseTensorSliceTransfer_StaticToStatic::DstScalarPerVector"], [0, 1, 1, "_CPPv4I0000000_7index_t_7index_t_N9enable_ifIXaaclN7SrcDesc20IsKnownAtCompileTimeEEclN7DstDesc20IsKnownAtCompileTimeEEEbE4typeEEN2ck44ThreadwiseTensorSliceTransfer_StaticToStaticE", "ck::ThreadwiseTensorSliceTransfer_StaticToStatic::DstVectorDim"], [0, 1, 1, "_CPPv4I0000000_7index_t_7index_t_N9enable_ifIXaaclN7SrcDesc20IsKnownAtCompileTimeEEclN7DstDesc20IsKnownAtCompileTimeEEEbE4typeEEN2ck44ThreadwiseTensorSliceTransfer_StaticToStaticE", "ck::ThreadwiseTensorSliceTransfer_StaticToStatic::ElementwiseOperation"], [0, 1, 1, "_CPPv4I0000000_7index_t_7index_t_N9enable_ifIXaaclN7SrcDesc20IsKnownAtCompileTimeEEclN7DstDesc20IsKnownAtCompileTimeEEEbE4typeEEN2ck44ThreadwiseTensorSliceTransfer_StaticToStaticE", "ck::ThreadwiseTensorSliceTransfer_StaticToStatic::SliceLengths"], [0, 1, 1, "_CPPv4I0000000_7index_t_7index_t_N9enable_ifIXaaclN7SrcDesc20IsKnownAtCompileTimeEEclN7DstDesc20IsKnownAtCompileTimeEEEbE4typeEEN2ck44ThreadwiseTensorSliceTransfer_StaticToStaticE", "ck::ThreadwiseTensorSliceTransfer_StaticToStatic::SrcData"], [0, 1, 1, "_CPPv4I0000000_7index_t_7index_t_N9enable_ifIXaaclN7SrcDesc20IsKnownAtCompileTimeEEclN7DstDesc20IsKnownAtCompileTimeEEEbE4typeEEN2ck44ThreadwiseTensorSliceTransfer_StaticToStaticE", "ck::ThreadwiseTensorSliceTransfer_StaticToStatic::SrcDesc"]]}, "objtypes": {"0": "cpp:class", "1": "cpp:templateParam"}, "objnames": {"0": ["cpp", "class", "C++ class"], "1": ["cpp", "templateParam", "C++ template parameter"]}, "titleterms": {"api": 0, "refer": 0, "guid": [0, 1, 3, 4, 6], "introduct": [0, 3], "us": 0, "ck": [0, 5, 7], "datatyp": 0, "devicemem": 0, "kernel": [0, 5, 6], "For": 0, "flashattent": 0, "contributor": 1, "": [1, 2], "pull": 1, "request": 1, "guidelin": 1, "disclaim": 2, "amd": 2, "standard": 2, "legal": 2, "third": 2, "parti": 2, "get": 3, "start": [3, 5], "document": 3, "roadmap": 3, "support": 4, "primit": 4, "softmax": 4, "docker": 5, "hub": 5, "why": 5, "do": 5, "i": 5, "need": 5, "thi": 5, "so": 5, "what": 5, "compos": [5, 6], "And": 5, "insid": 5, "which": 5, "imag": 5, "right": 5, "me": 5, "dii": 5, "here": 5, "licens": 5, "user": 6, "content": 6, "hello": 7, "world": 7, "motiv": 7, "descript": 7, "hardwar": 7, "target": 7, "build": 7, "librari": 7, "run": 7, "exampl": 7, "test": 7, "summari": 7}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinxcontrib.bibtex": 9, "sphinx": 57}, "alltitles": {"API Reference Guide": [[0, "api-reference-guide"]], "Introduction": [[0, "introduction"], [3, "introduction"]], "Using CK API": [[0, "using-ck-api"]], "CK Datatypes": [[0, "ck-datatypes"]], "DeviceMem": [[0, "devicemem"]], "Kernels For Flashattention": [[0, "kernels-for-flashattention"]], "Contributor\u2019s Guide": [[1, "contributor-s-guide"]], "Pull-request guidelines": [[1, "pull-request-guidelines"]], "Disclaimer": [[2, "disclaimer"]], "AMD\u2019s standard legal Disclaimer": [[2, "amd-s-standard-legal-disclaimer"]], "Third Party Disclaimer": [[2, "third-party-disclaimer"]], "Getting Started Guide": [[3, "getting-started-guide"]], "Documentation Roadmap": [[3, "documentation-roadmap"]], "Supported Primitives Guide": [[4, "supported-primitives-guide"]], "Softmax": [[4, "softmax"]], "CK docker hub": [[5, "ck-docker-hub"]], "Why do I need this?": [[5, "why-do-i-need-this"]], "So what is Composable Kernel?": [[5, "so-what-is-composable-kernel"]], "And what is inside?": [[5, "and-what-is-inside"]], "Which image is right for me?": [[5, "which-image-is-right-for-me"]], "DIY starts here": [[5, "diy-starts-here"]], "License": [[5, "license"]], "Composable Kernel User Guide": [[6, "composable-kernel-user-guide"]], "Contents:": [[6, null]], "CK Hello world": [[7, "ck-hello-world"]], "Motivation": [[7, "motivation"]], "Description": [[7, "description"]], "Hardware targets": [[7, "hardware-targets"]], "Build the library": [[7, "build-the-library"]], "Run examples and tests": [[7, "run-examples-and-tests"]], "Summary": [[7, "summary"]]}, "indexentries": {"devicemem (c++ struct)": [[0, "_CPPv49DeviceMem"]], "ck::blockwisegemmxdlops_v2 (c++ struct)": [[0, "_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E"]], "ck::blockwisesoftmax (c++ struct)": [[0, "_CPPv4I_7index_t0000_bEN2ck16BlockwiseSoftmaxE"]], "ck::gridwisebatchedgemmsoftmaxgemm_xdl_cshuffle (c++ struct)": [[0, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE"]], "ck::threadgrouptensorslicetransfer_v4r1 (c++ struct)": [[0, "_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E"]], "ck::threadwisetensorslicetransfer_statictostatic (c++ struct)": [[0, "_CPPv4I0000000_7index_t_7index_t_N9enable_ifIXaaclN7SrcDesc20IsKnownAtCompileTimeEEclN7DstDesc20IsKnownAtCompileTimeEEEbE4typeEEN2ck44ThreadwiseTensorSliceTransfer_StaticToStaticE"]]}})