Commit f53f6dad authored by geyyer's avatar geyyer
Browse files

Deploying to gh-pages from @ f964a5e68ab26b77a1a9b80f5be1ef1e97b5e23b 🚀

parent 26ed5d8d
/**
* @preserve HTML5 Shiv 3.7.3 | @afarkas @jdalton @jon_neal @rem | MIT/GPL2 Licensed
*/
!function(a,b){function c(a,b){var c=a.createElement("p"),d=a.getElementsByTagName("head")[0]||a.documentElement;return c.innerHTML="x<style>"+b+"</style>",d.insertBefore(c.lastChild,d.firstChild)}function d(){var a=t.elements;return"string"==typeof a?a.split(" "):a}function e(a,b){var c=t.elements;"string"!=typeof c&&(c=c.join(" ")),"string"!=typeof a&&(a=a.join(" ")),t.elements=c+" "+a,j(b)}function f(a){var b=s[a[q]];return b||(b={},r++,a[q]=r,s[r]=b),b}function g(a,c,d){if(c||(c=b),l)return c.createElement(a);d||(d=f(c));var e;return e=d.cache[a]?d.cache[a].cloneNode():p.test(a)?(d.cache[a]=d.createElem(a)).cloneNode():d.createElem(a),!e.canHaveChildren||o.test(a)||e.tagUrn?e:d.frag.appendChild(e)}function h(a,c){if(a||(a=b),l)return a.createDocumentFragment();c=c||f(a);for(var e=c.frag.cloneNode(),g=0,h=d(),i=h.length;i>g;g++)e.createElement(h[g]);return e}function i(a,b){b.cache||(b.cache={},b.createElem=a.createElement,b.createFrag=a.createDocumentFragment,b.frag=b.createFrag()),a.createElement=function(c){return t.shivMethods?g(c,a,b):b.createElem(c)},a.createDocumentFragment=Function("h,f","return function(){var n=f.cloneNode(),c=n.createElement;h.shivMethods&&("+d().join().replace(/[\w\-:]+/g,function(a){return b.createElem(a),b.frag.createElement(a),'c("'+a+'")'})+");return n}")(t,b.frag)}function j(a){a||(a=b);var d=f(a);return!t.shivCSS||k||d.hasCSS||(d.hasCSS=!!c(a,"article,aside,dialog,figcaption,figure,footer,header,hgroup,main,nav,section{display:block}mark{background:#FF0;color:#000}template{display:none}")),l||i(a,d),a}var k,l,m="3.7.3-pre",n=a.html5||{},o=/^<|^(?:button|map|select|textarea|object|iframe|option|optgroup)$/i,p=/^(?:a|b|code|div|fieldset|h1|h2|h3|h4|h5|h6|i|label|li|ol|p|q|span|strong|style|table|tbody|td|th|tr|ul)$/i,q="_html5shiv",r=0,s={};!function(){try{var a=b.createElement("a");a.innerHTML="<xyz></xyz>",k="hidden"in a,l=1==a.childNodes.length||function(){b.createElement("a");var a=b.createDocumentFragment();return"undefined"==typeof a.cloneNode||"undefined"==typeof a.createDocumentFragment||"undefined"==typeof a.createElement}()}catch(c){k=!0,l=!0}}();var t={elements:n.elements||"abbr article aside audio bdi canvas data datalist details dialog figcaption figure footer header hgroup main mark meter nav output picture progress section summary template time video",version:m,shivCSS:n.shivCSS!==!1,supportsUnknownElements:l,shivMethods:n.shivMethods!==!1,type:"default",shivDocument:j,createElement:g,createDocumentFragment:h,addElements:e};a.html5=t,j(b),"object"==typeof module&&module.exports&&(module.exports=t)}("undefined"!=typeof window?window:this,document);
\ No newline at end of file
!function(n){var e={};function t(i){if(e[i])return e[i].exports;var o=e[i]={i:i,l:!1,exports:{}};return n[i].call(o.exports,o,o.exports,t),o.l=!0,o.exports}t.m=n,t.c=e,t.d=function(n,e,i){t.o(n,e)||Object.defineProperty(n,e,{enumerable:!0,get:i})},t.r=function(n){"undefined"!=typeof Symbol&&Symbol.toStringTag&&Object.defineProperty(n,Symbol.toStringTag,{value:"Module"}),Object.defineProperty(n,"__esModule",{value:!0})},t.t=function(n,e){if(1&e&&(n=t(n)),8&e)return n;if(4&e&&"object"==typeof n&&n&&n.__esModule)return n;var i=Object.create(null);if(t.r(i),Object.defineProperty(i,"default",{enumerable:!0,value:n}),2&e&&"string"!=typeof n)for(var o in n)t.d(i,o,function(e){return n[e]}.bind(null,o));return i},t.n=function(n){var e=n&&n.__esModule?function(){return n.default}:function(){return n};return t.d(e,"a",e),e},t.o=function(n,e){return Object.prototype.hasOwnProperty.call(n,e)},t.p="",t(t.s=0)}([function(n,e,t){t(1),n.exports=t(3)},function(n,e,t){(function(){var e="undefined"!=typeof window?window.jQuery:t(2);n.exports.ThemeNav={navBar:null,win:null,winScroll:!1,winResize:!1,linkScroll:!1,winPosition:0,winHeight:null,docHeight:null,isRunning:!1,enable:function(n){var t=this;void 0===n&&(n=!0),t.isRunning||(t.isRunning=!0,e((function(e){t.init(e),t.reset(),t.win.on("hashchange",t.reset),n&&t.win.on("scroll",(function(){t.linkScroll||t.winScroll||(t.winScroll=!0,requestAnimationFrame((function(){t.onScroll()})))})),t.win.on("resize",(function(){t.winResize||(t.winResize=!0,requestAnimationFrame((function(){t.onResize()})))})),t.onResize()})))},enableSticky:function(){this.enable(!0)},init:function(n){n(document);var e=this;this.navBar=n("div.wy-side-scroll:first"),this.win=n(window),n(document).on("click","[data-toggle='wy-nav-top']",(function(){n("[data-toggle='wy-nav-shift']").toggleClass("shift"),n("[data-toggle='rst-versions']").toggleClass("shift")})).on("click",".wy-menu-vertical .current ul li a",(function(){var t=n(this);n("[data-toggle='wy-nav-shift']").removeClass("shift"),n("[data-toggle='rst-versions']").toggleClass("shift"),e.toggleCurrent(t),e.hashChange()})).on("click","[data-toggle='rst-current-version']",(function(){n("[data-toggle='rst-versions']").toggleClass("shift-up")})),n("table.docutils:not(.field-list,.footnote,.citation)").wrap("<div class='wy-table-responsive'></div>"),n("table.docutils.footnote").wrap("<div class='wy-table-responsive footnote'></div>"),n("table.docutils.citation").wrap("<div class='wy-table-responsive citation'></div>"),n(".wy-menu-vertical ul").not(".simple").siblings("a").each((function(){var t=n(this);expand=n('<button class="toctree-expand" title="Open/close menu"></button>'),expand.on("click",(function(n){return e.toggleCurrent(t),n.stopPropagation(),!1})),t.prepend(expand)}))},reset:function(){var n=encodeURI(window.location.hash)||"#";try{var e=$(".wy-menu-vertical"),t=e.find('[href="'+n+'"]');if(0===t.length){var i=$('.document [id="'+n.substring(1)+'"]').closest("div.section");0===(t=e.find('[href="#'+i.attr("id")+'"]')).length&&(t=e.find('[href="#"]'))}if(t.length>0){$(".wy-menu-vertical .current").removeClass("current").attr("aria-expanded","false"),t.addClass("current").attr("aria-expanded","true"),t.closest("li.toctree-l1").parent().addClass("current").attr("aria-expanded","true");for(let n=1;n<=10;n++)t.closest("li.toctree-l"+n).addClass("current").attr("aria-expanded","true");t[0].scrollIntoView()}}catch(n){console.log("Error expanding nav for anchor",n)}},onScroll:function(){this.winScroll=!1;var n=this.win.scrollTop(),e=n+this.winHeight,t=this.navBar.scrollTop()+(n-this.winPosition);n<0||e>this.docHeight||(this.navBar.scrollTop(t),this.winPosition=n)},onResize:function(){this.winResize=!1,this.winHeight=this.win.height(),this.docHeight=$(document).height()},hashChange:function(){this.linkScroll=!0,this.win.one("hashchange",(function(){this.linkScroll=!1}))},toggleCurrent:function(n){var e=n.closest("li");e.siblings("li.current").removeClass("current").attr("aria-expanded","false"),e.siblings().find("li.current").removeClass("current").attr("aria-expanded","false");var t=e.find("> ul li");t.length&&(t.removeClass("current").attr("aria-expanded","false"),e.toggleClass("current").attr("aria-expanded",(function(n,e){return"true"==e?"false":"true"})))}},"undefined"!=typeof window&&(window.SphinxRtdTheme={Navigation:n.exports.ThemeNav,StickyNav:n.exports.ThemeNav}),function(){for(var n=0,e=["ms","moz","webkit","o"],t=0;t<e.length&&!window.requestAnimationFrame;++t)window.requestAnimationFrame=window[e[t]+"RequestAnimationFrame"],window.cancelAnimationFrame=window[e[t]+"CancelAnimationFrame"]||window[e[t]+"CancelRequestAnimationFrame"];window.requestAnimationFrame||(window.requestAnimationFrame=function(e,t){var i=(new Date).getTime(),o=Math.max(0,16-(i-n)),r=window.setTimeout((function(){e(i+o)}),o);return n=i+o,r}),window.cancelAnimationFrame||(window.cancelAnimationFrame=function(n){clearTimeout(n)})}()}).call(window)},function(n,e){n.exports=jQuery},function(n,e,t){}]);
\ No newline at end of file
/*
* language_data.js
* ~~~~~~~~~~~~~~~~
*
* This script contains the language-specific data used by searchtools.js,
* namely the list of stopwords, stemmer, scorer and splitter.
*
* :copyright: Copyright 2007-2023 by the Sphinx team, see AUTHORS.
* :license: BSD, see LICENSE for details.
*
*/
var stopwords = ["a", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is", "it", "near", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there", "these", "they", "this", "to", "was", "will", "with"];
/* Non-minified version is copied as a separate JS file, is available */
/**
* Porter Stemmer
*/
var Stemmer = function() {
var step2list = {
ational: 'ate',
tional: 'tion',
enci: 'ence',
anci: 'ance',
izer: 'ize',
bli: 'ble',
alli: 'al',
entli: 'ent',
eli: 'e',
ousli: 'ous',
ization: 'ize',
ation: 'ate',
ator: 'ate',
alism: 'al',
iveness: 'ive',
fulness: 'ful',
ousness: 'ous',
aliti: 'al',
iviti: 'ive',
biliti: 'ble',
logi: 'log'
};
var step3list = {
icate: 'ic',
ative: '',
alize: 'al',
iciti: 'ic',
ical: 'ic',
ful: '',
ness: ''
};
var c = "[^aeiou]"; // consonant
var v = "[aeiouy]"; // vowel
var C = c + "[^aeiouy]*"; // consonant sequence
var V = v + "[aeiou]*"; // vowel sequence
var mgr0 = "^(" + C + ")?" + V + C; // [C]VC... is m>0
var meq1 = "^(" + C + ")?" + V + C + "(" + V + ")?$"; // [C]VC[V] is m=1
var mgr1 = "^(" + C + ")?" + V + C + V + C; // [C]VCVC... is m>1
var s_v = "^(" + C + ")?" + v; // vowel in stem
this.stemWord = function (w) {
var stem;
var suffix;
var firstch;
var origword = w;
if (w.length < 3)
return w;
var re;
var re2;
var re3;
var re4;
firstch = w.substr(0,1);
if (firstch == "y")
w = firstch.toUpperCase() + w.substr(1);
// Step 1a
re = /^(.+?)(ss|i)es$/;
re2 = /^(.+?)([^s])s$/;
if (re.test(w))
w = w.replace(re,"$1$2");
else if (re2.test(w))
w = w.replace(re2,"$1$2");
// Step 1b
re = /^(.+?)eed$/;
re2 = /^(.+?)(ed|ing)$/;
if (re.test(w)) {
var fp = re.exec(w);
re = new RegExp(mgr0);
if (re.test(fp[1])) {
re = /.$/;
w = w.replace(re,"");
}
}
else if (re2.test(w)) {
var fp = re2.exec(w);
stem = fp[1];
re2 = new RegExp(s_v);
if (re2.test(stem)) {
w = stem;
re2 = /(at|bl|iz)$/;
re3 = new RegExp("([^aeiouylsz])\\1$");
re4 = new RegExp("^" + C + v + "[^aeiouwxy]$");
if (re2.test(w))
w = w + "e";
else if (re3.test(w)) {
re = /.$/;
w = w.replace(re,"");
}
else if (re4.test(w))
w = w + "e";
}
}
// Step 1c
re = /^(.+?)y$/;
if (re.test(w)) {
var fp = re.exec(w);
stem = fp[1];
re = new RegExp(s_v);
if (re.test(stem))
w = stem + "i";
}
// Step 2
re = /^(.+?)(ational|tional|enci|anci|izer|bli|alli|entli|eli|ousli|ization|ation|ator|alism|iveness|fulness|ousness|aliti|iviti|biliti|logi)$/;
if (re.test(w)) {
var fp = re.exec(w);
stem = fp[1];
suffix = fp[2];
re = new RegExp(mgr0);
if (re.test(stem))
w = stem + step2list[suffix];
}
// Step 3
re = /^(.+?)(icate|ative|alize|iciti|ical|ful|ness)$/;
if (re.test(w)) {
var fp = re.exec(w);
stem = fp[1];
suffix = fp[2];
re = new RegExp(mgr0);
if (re.test(stem))
w = stem + step3list[suffix];
}
// Step 4
re = /^(.+?)(al|ance|ence|er|ic|able|ible|ant|ement|ment|ent|ou|ism|ate|iti|ous|ive|ize)$/;
re2 = /^(.+?)(s|t)(ion)$/;
if (re.test(w)) {
var fp = re.exec(w);
stem = fp[1];
re = new RegExp(mgr1);
if (re.test(stem))
w = stem;
}
else if (re2.test(w)) {
var fp = re2.exec(w);
stem = fp[1] + fp[2];
re2 = new RegExp(mgr1);
if (re2.test(stem))
w = stem;
}
// Step 5
re = /^(.+?)e$/;
if (re.test(w)) {
var fp = re.exec(w);
stem = fp[1];
re = new RegExp(mgr1);
re2 = new RegExp(meq1);
re3 = new RegExp("^" + C + v + "[^aeiouwxy]$");
if (re.test(stem) || (re2.test(stem) && !(re3.test(stem))))
w = stem;
}
re = /ll$/;
re2 = new RegExp(mgr1);
if (re.test(w) && re2.test(w)) {
re = /.$/;
w = w.replace(re,"");
}
// and turn initial Y back to y
if (firstch == "y")
w = firstch.toLowerCase() + w.substr(1);
return w;
}
}
pre { line-height: 125%; }
td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
.highlight .hll { background-color: #ffffcc }
.highlight { background: #eeffcc; }
.highlight .c { color: #408090; font-style: italic } /* Comment */
.highlight .err { border: 1px solid #FF0000 } /* Error */
.highlight .k { color: #007020; font-weight: bold } /* Keyword */
.highlight .o { color: #666666 } /* Operator */
.highlight .ch { color: #408090; font-style: italic } /* Comment.Hashbang */
.highlight .cm { color: #408090; font-style: italic } /* Comment.Multiline */
.highlight .cp { color: #007020 } /* Comment.Preproc */
.highlight .cpf { color: #408090; font-style: italic } /* Comment.PreprocFile */
.highlight .c1 { color: #408090; font-style: italic } /* Comment.Single */
.highlight .cs { color: #408090; background-color: #fff0f0 } /* Comment.Special */
.highlight .gd { color: #A00000 } /* Generic.Deleted */
.highlight .ge { font-style: italic } /* Generic.Emph */
.highlight .gr { color: #FF0000 } /* Generic.Error */
.highlight .gh { color: #000080; font-weight: bold } /* Generic.Heading */
.highlight .gi { color: #00A000 } /* Generic.Inserted */
.highlight .go { color: #333333 } /* Generic.Output */
.highlight .gp { color: #c65d09; font-weight: bold } /* Generic.Prompt */
.highlight .gs { font-weight: bold } /* Generic.Strong */
.highlight .gu { color: #800080; font-weight: bold } /* Generic.Subheading */
.highlight .gt { color: #0044DD } /* Generic.Traceback */
.highlight .kc { color: #007020; font-weight: bold } /* Keyword.Constant */
.highlight .kd { color: #007020; font-weight: bold } /* Keyword.Declaration */
.highlight .kn { color: #007020; font-weight: bold } /* Keyword.Namespace */
.highlight .kp { color: #007020 } /* Keyword.Pseudo */
.highlight .kr { color: #007020; font-weight: bold } /* Keyword.Reserved */
.highlight .kt { color: #902000 } /* Keyword.Type */
.highlight .m { color: #208050 } /* Literal.Number */
.highlight .s { color: #4070a0 } /* Literal.String */
.highlight .na { color: #4070a0 } /* Name.Attribute */
.highlight .nb { color: #007020 } /* Name.Builtin */
.highlight .nc { color: #0e84b5; font-weight: bold } /* Name.Class */
.highlight .no { color: #60add5 } /* Name.Constant */
.highlight .nd { color: #555555; font-weight: bold } /* Name.Decorator */
.highlight .ni { color: #d55537; font-weight: bold } /* Name.Entity */
.highlight .ne { color: #007020 } /* Name.Exception */
.highlight .nf { color: #06287e } /* Name.Function */
.highlight .nl { color: #002070; font-weight: bold } /* Name.Label */
.highlight .nn { color: #0e84b5; font-weight: bold } /* Name.Namespace */
.highlight .nt { color: #062873; font-weight: bold } /* Name.Tag */
.highlight .nv { color: #bb60d5 } /* Name.Variable */
.highlight .ow { color: #007020; font-weight: bold } /* Operator.Word */
.highlight .w { color: #bbbbbb } /* Text.Whitespace */
.highlight .mb { color: #208050 } /* Literal.Number.Bin */
.highlight .mf { color: #208050 } /* Literal.Number.Float */
.highlight .mh { color: #208050 } /* Literal.Number.Hex */
.highlight .mi { color: #208050 } /* Literal.Number.Integer */
.highlight .mo { color: #208050 } /* Literal.Number.Oct */
.highlight .sa { color: #4070a0 } /* Literal.String.Affix */
.highlight .sb { color: #4070a0 } /* Literal.String.Backtick */
.highlight .sc { color: #4070a0 } /* Literal.String.Char */
.highlight .dl { color: #4070a0 } /* Literal.String.Delimiter */
.highlight .sd { color: #4070a0; font-style: italic } /* Literal.String.Doc */
.highlight .s2 { color: #4070a0 } /* Literal.String.Double */
.highlight .se { color: #4070a0; font-weight: bold } /* Literal.String.Escape */
.highlight .sh { color: #4070a0 } /* Literal.String.Heredoc */
.highlight .si { color: #70a0d0; font-style: italic } /* Literal.String.Interpol */
.highlight .sx { color: #c65d09 } /* Literal.String.Other */
.highlight .sr { color: #235388 } /* Literal.String.Regex */
.highlight .s1 { color: #4070a0 } /* Literal.String.Single */
.highlight .ss { color: #517918 } /* Literal.String.Symbol */
.highlight .bp { color: #007020 } /* Name.Builtin.Pseudo */
.highlight .fm { color: #06287e } /* Name.Function.Magic */
.highlight .vc { color: #bb60d5 } /* Name.Variable.Class */
.highlight .vg { color: #bb60d5 } /* Name.Variable.Global */
.highlight .vi { color: #bb60d5 } /* Name.Variable.Instance */
.highlight .vm { color: #bb60d5 } /* Name.Variable.Magic */
.highlight .il { color: #208050 } /* Literal.Number.Integer.Long */
\ No newline at end of file
/*
* searchtools.js
* ~~~~~~~~~~~~~~~~
*
* Sphinx JavaScript utilities for the full-text search.
*
* :copyright: Copyright 2007-2023 by the Sphinx team, see AUTHORS.
* :license: BSD, see LICENSE for details.
*
*/
"use strict";
/**
* Simple result scoring code.
*/
if (typeof Scorer === "undefined") {
var Scorer = {
// Implement the following function to further tweak the score for each result
// The function takes a result array [docname, title, anchor, descr, score, filename]
// and returns the new score.
/*
score: result => {
const [docname, title, anchor, descr, score, filename] = result
return score
},
*/
// query matches the full name of an object
objNameMatch: 11,
// or matches in the last dotted part of the object name
objPartialMatch: 6,
// Additive scores depending on the priority of the object
objPrio: {
0: 15, // used to be importantResults
1: 5, // used to be objectResults
2: -5, // used to be unimportantResults
},
// Used when the priority is not in the mapping.
objPrioDefault: 0,
// query found in title
title: 15,
partialTitle: 7,
// query found in terms
term: 5,
partialTerm: 2,
};
}
const _removeChildren = (element) => {
while (element && element.lastChild) element.removeChild(element.lastChild);
};
/**
* See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions#escaping
*/
const _escapeRegExp = (string) =>
string.replace(/[.*+\-?^${}()|[\]\\]/g, "\\$&"); // $& means the whole matched string
const _displayItem = (item, searchTerms) => {
const docBuilder = DOCUMENTATION_OPTIONS.BUILDER;
const docUrlRoot = DOCUMENTATION_OPTIONS.URL_ROOT;
const docFileSuffix = DOCUMENTATION_OPTIONS.FILE_SUFFIX;
const docLinkSuffix = DOCUMENTATION_OPTIONS.LINK_SUFFIX;
const showSearchSummary = DOCUMENTATION_OPTIONS.SHOW_SEARCH_SUMMARY;
const [docName, title, anchor, descr, score, _filename] = item;
let listItem = document.createElement("li");
let requestUrl;
let linkUrl;
if (docBuilder === "dirhtml") {
// dirhtml builder
let dirname = docName + "/";
if (dirname.match(/\/index\/$/))
dirname = dirname.substring(0, dirname.length - 6);
else if (dirname === "index/") dirname = "";
requestUrl = docUrlRoot + dirname;
linkUrl = requestUrl;
} else {
// normal html builders
requestUrl = docUrlRoot + docName + docFileSuffix;
linkUrl = docName + docLinkSuffix;
}
let linkEl = listItem.appendChild(document.createElement("a"));
linkEl.href = linkUrl + anchor;
linkEl.dataset.score = score;
linkEl.innerHTML = title;
if (descr)
listItem.appendChild(document.createElement("span")).innerHTML =
" (" + descr + ")";
else if (showSearchSummary)
fetch(requestUrl)
.then((responseData) => responseData.text())
.then((data) => {
if (data)
listItem.appendChild(
Search.makeSearchSummary(data, searchTerms)
);
});
Search.output.appendChild(listItem);
};
const _finishSearch = (resultCount) => {
Search.stopPulse();
Search.title.innerText = _("Search Results");
if (!resultCount)
Search.status.innerText = Documentation.gettext(
"Your search did not match any documents. Please make sure that all words are spelled correctly and that you've selected enough categories."
);
else
Search.status.innerText = _(
`Search finished, found ${resultCount} page(s) matching the search query.`
);
};
const _displayNextItem = (
results,
resultCount,
searchTerms
) => {
// results left, load the summary and display it
// this is intended to be dynamic (don't sub resultsCount)
if (results.length) {
_displayItem(results.pop(), searchTerms);
setTimeout(
() => _displayNextItem(results, resultCount, searchTerms),
5
);
}
// search finished, update title and status message
else _finishSearch(resultCount);
};
/**
* Default splitQuery function. Can be overridden in ``sphinx.search`` with a
* custom function per language.
*
* The regular expression works by splitting the string on consecutive characters
* that are not Unicode letters, numbers, underscores, or emoji characters.
* This is the same as ``\W+`` in Python, preserving the surrogate pair area.
*/
if (typeof splitQuery === "undefined") {
var splitQuery = (query) => query
.split(/[^\p{Letter}\p{Number}_\p{Emoji_Presentation}]+/gu)
.filter(term => term) // remove remaining empty strings
}
/**
* Search Module
*/
const Search = {
_index: null,
_queued_query: null,
_pulse_status: -1,
htmlToText: (htmlString) => {
const htmlElement = new DOMParser().parseFromString(htmlString, 'text/html');
htmlElement.querySelectorAll(".headerlink").forEach((el) => { el.remove() });
const docContent = htmlElement.querySelector('[role="main"]');
if (docContent !== undefined) return docContent.textContent;
console.warn(
"Content block not found. Sphinx search tries to obtain it via '[role=main]'. Could you check your theme or template."
);
return "";
},
init: () => {
const query = new URLSearchParams(window.location.search).get("q");
document
.querySelectorAll('input[name="q"]')
.forEach((el) => (el.value = query));
if (query) Search.performSearch(query);
},
loadIndex: (url) =>
(document.body.appendChild(document.createElement("script")).src = url),
setIndex: (index) => {
Search._index = index;
if (Search._queued_query !== null) {
const query = Search._queued_query;
Search._queued_query = null;
Search.query(query);
}
},
hasIndex: () => Search._index !== null,
deferQuery: (query) => (Search._queued_query = query),
stopPulse: () => (Search._pulse_status = -1),
startPulse: () => {
if (Search._pulse_status >= 0) return;
const pulse = () => {
Search._pulse_status = (Search._pulse_status + 1) % 4;
Search.dots.innerText = ".".repeat(Search._pulse_status);
if (Search._pulse_status >= 0) window.setTimeout(pulse, 500);
};
pulse();
},
/**
* perform a search for something (or wait until index is loaded)
*/
performSearch: (query) => {
// create the required interface elements
const searchText = document.createElement("h2");
searchText.textContent = _("Searching");
const searchSummary = document.createElement("p");
searchSummary.classList.add("search-summary");
searchSummary.innerText = "";
const searchList = document.createElement("ul");
searchList.classList.add("search");
const out = document.getElementById("search-results");
Search.title = out.appendChild(searchText);
Search.dots = Search.title.appendChild(document.createElement("span"));
Search.status = out.appendChild(searchSummary);
Search.output = out.appendChild(searchList);
const searchProgress = document.getElementById("search-progress");
// Some themes don't use the search progress node
if (searchProgress) {
searchProgress.innerText = _("Preparing search...");
}
Search.startPulse();
// index already loaded, the browser was quick!
if (Search.hasIndex()) Search.query(query);
else Search.deferQuery(query);
},
/**
* execute search (requires search index to be loaded)
*/
query: (query) => {
const filenames = Search._index.filenames;
const docNames = Search._index.docnames;
const titles = Search._index.titles;
const allTitles = Search._index.alltitles;
const indexEntries = Search._index.indexentries;
// stem the search terms and add them to the correct list
const stemmer = new Stemmer();
const searchTerms = new Set();
const excludedTerms = new Set();
const highlightTerms = new Set();
const objectTerms = new Set(splitQuery(query.toLowerCase().trim()));
splitQuery(query.trim()).forEach((queryTerm) => {
const queryTermLower = queryTerm.toLowerCase();
// maybe skip this "word"
// stopwords array is from language_data.js
if (
stopwords.indexOf(queryTermLower) !== -1 ||
queryTerm.match(/^\d+$/)
)
return;
// stem the word
let word = stemmer.stemWord(queryTermLower);
// select the correct list
if (word[0] === "-") excludedTerms.add(word.substr(1));
else {
searchTerms.add(word);
highlightTerms.add(queryTermLower);
}
});
if (SPHINX_HIGHLIGHT_ENABLED) { // set in sphinx_highlight.js
localStorage.setItem("sphinx_highlight_terms", [...highlightTerms].join(" "))
}
// console.debug("SEARCH: searching for:");
// console.info("required: ", [...searchTerms]);
// console.info("excluded: ", [...excludedTerms]);
// array of [docname, title, anchor, descr, score, filename]
let results = [];
_removeChildren(document.getElementById("search-progress"));
const queryLower = query.toLowerCase();
for (const [title, foundTitles] of Object.entries(allTitles)) {
if (title.toLowerCase().includes(queryLower) && (queryLower.length >= title.length/2)) {
for (const [file, id] of foundTitles) {
let score = Math.round(100 * queryLower.length / title.length)
results.push([
docNames[file],
titles[file] !== title ? `${titles[file]} > ${title}` : title,
id !== null ? "#" + id : "",
null,
score,
filenames[file],
]);
}
}
}
// search for explicit entries in index directives
for (const [entry, foundEntries] of Object.entries(indexEntries)) {
if (entry.includes(queryLower) && (queryLower.length >= entry.length/2)) {
for (const [file, id] of foundEntries) {
let score = Math.round(100 * queryLower.length / entry.length)
results.push([
docNames[file],
titles[file],
id ? "#" + id : "",
null,
score,
filenames[file],
]);
}
}
}
// lookup as object
objectTerms.forEach((term) =>
results.push(...Search.performObjectSearch(term, objectTerms))
);
// lookup as search terms in fulltext
results.push(...Search.performTermsSearch(searchTerms, excludedTerms));
// let the scorer override scores with a custom scoring function
if (Scorer.score) results.forEach((item) => (item[4] = Scorer.score(item)));
// now sort the results by score (in opposite order of appearance, since the
// display function below uses pop() to retrieve items) and then
// alphabetically
results.sort((a, b) => {
const leftScore = a[4];
const rightScore = b[4];
if (leftScore === rightScore) {
// same score: sort alphabetically
const leftTitle = a[1].toLowerCase();
const rightTitle = b[1].toLowerCase();
if (leftTitle === rightTitle) return 0;
return leftTitle > rightTitle ? -1 : 1; // inverted is intentional
}
return leftScore > rightScore ? 1 : -1;
});
// remove duplicate search results
// note the reversing of results, so that in the case of duplicates, the highest-scoring entry is kept
let seen = new Set();
results = results.reverse().reduce((acc, result) => {
let resultStr = result.slice(0, 4).concat([result[5]]).map(v => String(v)).join(',');
if (!seen.has(resultStr)) {
acc.push(result);
seen.add(resultStr);
}
return acc;
}, []);
results = results.reverse();
// for debugging
//Search.lastresults = results.slice(); // a copy
// console.info("search results:", Search.lastresults);
// print the results
_displayNextItem(results, results.length, searchTerms);
},
/**
* search for object names
*/
performObjectSearch: (object, objectTerms) => {
const filenames = Search._index.filenames;
const docNames = Search._index.docnames;
const objects = Search._index.objects;
const objNames = Search._index.objnames;
const titles = Search._index.titles;
const results = [];
const objectSearchCallback = (prefix, match) => {
const name = match[4]
const fullname = (prefix ? prefix + "." : "") + name;
const fullnameLower = fullname.toLowerCase();
if (fullnameLower.indexOf(object) < 0) return;
let score = 0;
const parts = fullnameLower.split(".");
// check for different match types: exact matches of full name or
// "last name" (i.e. last dotted part)
if (fullnameLower === object || parts.slice(-1)[0] === object)
score += Scorer.objNameMatch;
else if (parts.slice(-1)[0].indexOf(object) > -1)
score += Scorer.objPartialMatch; // matches in last name
const objName = objNames[match[1]][2];
const title = titles[match[0]];
// If more than one term searched for, we require other words to be
// found in the name/title/description
const otherTerms = new Set(objectTerms);
otherTerms.delete(object);
if (otherTerms.size > 0) {
const haystack = `${prefix} ${name} ${objName} ${title}`.toLowerCase();
if (
[...otherTerms].some((otherTerm) => haystack.indexOf(otherTerm) < 0)
)
return;
}
let anchor = match[3];
if (anchor === "") anchor = fullname;
else if (anchor === "-") anchor = objNames[match[1]][1] + "-" + fullname;
const descr = objName + _(", in ") + title;
// add custom score for some objects according to scorer
if (Scorer.objPrio.hasOwnProperty(match[2]))
score += Scorer.objPrio[match[2]];
else score += Scorer.objPrioDefault;
results.push([
docNames[match[0]],
fullname,
"#" + anchor,
descr,
score,
filenames[match[0]],
]);
};
Object.keys(objects).forEach((prefix) =>
objects[prefix].forEach((array) =>
objectSearchCallback(prefix, array)
)
);
return results;
},
/**
* search for full-text terms in the index
*/
performTermsSearch: (searchTerms, excludedTerms) => {
// prepare search
const terms = Search._index.terms;
const titleTerms = Search._index.titleterms;
const filenames = Search._index.filenames;
const docNames = Search._index.docnames;
const titles = Search._index.titles;
const scoreMap = new Map();
const fileMap = new Map();
// perform the search on the required terms
searchTerms.forEach((word) => {
const files = [];
const arr = [
{ files: terms[word], score: Scorer.term },
{ files: titleTerms[word], score: Scorer.title },
];
// add support for partial matches
if (word.length > 2) {
const escapedWord = _escapeRegExp(word);
Object.keys(terms).forEach((term) => {
if (term.match(escapedWord) && !terms[word])
arr.push({ files: terms[term], score: Scorer.partialTerm });
});
Object.keys(titleTerms).forEach((term) => {
if (term.match(escapedWord) && !titleTerms[word])
arr.push({ files: titleTerms[word], score: Scorer.partialTitle });
});
}
// no match but word was a required one
if (arr.every((record) => record.files === undefined)) return;
// found search word in contents
arr.forEach((record) => {
if (record.files === undefined) return;
let recordFiles = record.files;
if (recordFiles.length === undefined) recordFiles = [recordFiles];
files.push(...recordFiles);
// set score for the word in each file
recordFiles.forEach((file) => {
if (!scoreMap.has(file)) scoreMap.set(file, {});
scoreMap.get(file)[word] = record.score;
});
});
// create the mapping
files.forEach((file) => {
if (fileMap.has(file) && fileMap.get(file).indexOf(word) === -1)
fileMap.get(file).push(word);
else fileMap.set(file, [word]);
});
});
// now check if the files don't contain excluded terms
const results = [];
for (const [file, wordList] of fileMap) {
// check if all requirements are matched
// as search terms with length < 3 are discarded
const filteredTermCount = [...searchTerms].filter(
(term) => term.length > 2
).length;
if (
wordList.length !== searchTerms.size &&
wordList.length !== filteredTermCount
)
continue;
// ensure that none of the excluded terms is in the search result
if (
[...excludedTerms].some(
(term) =>
terms[term] === file ||
titleTerms[term] === file ||
(terms[term] || []).includes(file) ||
(titleTerms[term] || []).includes(file)
)
)
break;
// select one (max) score for the file.
const score = Math.max(...wordList.map((w) => scoreMap.get(file)[w]));
// add result to the result list
results.push([
docNames[file],
titles[file],
"",
null,
score,
filenames[file],
]);
}
return results;
},
/**
* helper function to return a node containing the
* search summary for a given text. keywords is a list
* of stemmed words.
*/
makeSearchSummary: (htmlText, keywords) => {
const text = Search.htmlToText(htmlText);
if (text === "") return null;
const textLower = text.toLowerCase();
const actualStartPosition = [...keywords]
.map((k) => textLower.indexOf(k.toLowerCase()))
.filter((i) => i > -1)
.slice(-1)[0];
const startWithContext = Math.max(actualStartPosition - 120, 0);
const top = startWithContext === 0 ? "" : "...";
const tail = startWithContext + 240 < text.length ? "..." : "";
let summary = document.createElement("p");
summary.classList.add("context");
summary.textContent = top + text.substr(startWithContext, 240).trim() + tail;
return summary;
},
};
_ready(Search.init);
/* Highlighting utilities for Sphinx HTML documentation. */
"use strict";
const SPHINX_HIGHLIGHT_ENABLED = true
/**
* highlight a given string on a node by wrapping it in
* span elements with the given class name.
*/
const _highlight = (node, addItems, text, className) => {
if (node.nodeType === Node.TEXT_NODE) {
const val = node.nodeValue;
const parent = node.parentNode;
const pos = val.toLowerCase().indexOf(text);
if (
pos >= 0 &&
!parent.classList.contains(className) &&
!parent.classList.contains("nohighlight")
) {
let span;
const closestNode = parent.closest("body, svg, foreignObject");
const isInSVG = closestNode && closestNode.matches("svg");
if (isInSVG) {
span = document.createElementNS("http://www.w3.org/2000/svg", "tspan");
} else {
span = document.createElement("span");
span.classList.add(className);
}
span.appendChild(document.createTextNode(val.substr(pos, text.length)));
parent.insertBefore(
span,
parent.insertBefore(
document.createTextNode(val.substr(pos + text.length)),
node.nextSibling
)
);
node.nodeValue = val.substr(0, pos);
if (isInSVG) {
const rect = document.createElementNS(
"http://www.w3.org/2000/svg",
"rect"
);
const bbox = parent.getBBox();
rect.x.baseVal.value = bbox.x;
rect.y.baseVal.value = bbox.y;
rect.width.baseVal.value = bbox.width;
rect.height.baseVal.value = bbox.height;
rect.setAttribute("class", className);
addItems.push({ parent: parent, target: rect });
}
}
} else if (node.matches && !node.matches("button, select, textarea")) {
node.childNodes.forEach((el) => _highlight(el, addItems, text, className));
}
};
const _highlightText = (thisNode, text, className) => {
let addItems = [];
_highlight(thisNode, addItems, text, className);
addItems.forEach((obj) =>
obj.parent.insertAdjacentElement("beforebegin", obj.target)
);
};
/**
* Small JavaScript module for the documentation.
*/
const SphinxHighlight = {
/**
* highlight the search words provided in localstorage in the text
*/
highlightSearchWords: () => {
if (!SPHINX_HIGHLIGHT_ENABLED) return; // bail if no highlight
// get and clear terms from localstorage
const url = new URL(window.location);
const highlight =
localStorage.getItem("sphinx_highlight_terms")
|| url.searchParams.get("highlight")
|| "";
localStorage.removeItem("sphinx_highlight_terms")
url.searchParams.delete("highlight");
window.history.replaceState({}, "", url);
// get individual terms from highlight string
const terms = highlight.toLowerCase().split(/\s+/).filter(x => x);
if (terms.length === 0) return; // nothing to do
// There should never be more than one element matching "div.body"
const divBody = document.querySelectorAll("div.body");
const body = divBody.length ? divBody[0] : document.querySelector("body");
window.setTimeout(() => {
terms.forEach((term) => _highlightText(body, term, "highlighted"));
}, 10);
const searchBox = document.getElementById("searchbox");
if (searchBox === null) return;
searchBox.appendChild(
document
.createRange()
.createContextualFragment(
'<p class="highlight-link">' +
'<a href="javascript:SphinxHighlight.hideSearchWords()">' +
_("Hide Search Matches") +
"</a></p>"
)
);
},
/**
* helper function to hide the search marks again
*/
hideSearchWords: () => {
document
.querySelectorAll("#searchbox .highlight-link")
.forEach((el) => el.remove());
document
.querySelectorAll("span.highlighted")
.forEach((el) => el.classList.remove("highlighted"));
localStorage.removeItem("sphinx_highlight_terms")
},
initEscapeListener: () => {
// only install a listener if it is really needed
if (!DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS) return;
document.addEventListener("keydown", (event) => {
// bail for input elements
if (BLACKLISTED_KEY_CONTROL_ELEMENTS.has(document.activeElement.tagName)) return;
// bail with special keys
if (event.shiftKey || event.altKey || event.ctrlKey || event.metaKey) return;
if (DOCUMENTATION_OPTIONS.ENABLE_SEARCH_SHORTCUTS && (event.key === "Escape")) {
SphinxHighlight.hideSearchWords();
event.preventDefault();
}
});
},
};
_ready(SphinxHighlight.highlightSearchWords);
_ready(SphinxHighlight.initEscapeListener);
<!DOCTYPE html>
<html class="writer-html5" lang="en" >
<head>
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>3. CK docker hub &mdash; Composable Kernel (CK) documentation</title>
<link rel="stylesheet" href="_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
<!--[if lt IE 9]>
<script src="_static/js/html5shiv.min.js"></script>
<![endif]-->
<script data-url_root="./" id="documentation_options" src="_static/documentation_options.js"></script>
<script src="_static/doctools.js"></script>
<script src="_static/sphinx_highlight.js"></script>
<script src="_static/js/theme.js"></script>
<link rel="index" title="Index" href="genindex.html" />
<link rel="search" title="Search" href="search.html" />
<link rel="next" title="4. Supported Primitives Guide" href="Supported_Primitives_Guide.html" />
<link rel="prev" title="2. CK Hello world" href="tutorial_hello_world.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="index.html">
<img src="_static/rocm_logo.png" class="logo" alt="Logo"/>
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="Linux_Install_Guide.html">1. Getting Started Guide</a></li>
<li class="toctree-l1"><a class="reference internal" href="tutorial_hello_world.html">2. CK Hello world</a></li>
<li class="toctree-l1 current"><a class="current reference internal" href="#">3. CK docker hub</a><ul>
<li class="toctree-l2"><a class="reference internal" href="#why-do-i-need-this">3.1. Why do I need this?</a></li>
<li class="toctree-l2"><a class="reference internal" href="#so-what-is-composable-kernel">3.2. So what is Composable Kernel?</a></li>
<li class="toctree-l2"><a class="reference internal" href="#and-what-is-inside">3.3. And what is inside?</a></li>
<li class="toctree-l2"><a class="reference internal" href="#which-image-is-right-for-me">3.4. Which image is right for me?</a></li>
<li class="toctree-l2"><a class="reference internal" href="#diy-starts-here">3.5. DIY starts here</a></li>
<li class="toctree-l2"><a class="reference internal" href="#license">3.6. License</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="Supported_Primitives_Guide.html">4. Supported Primitives Guide</a></li>
<li class="toctree-l1"><a class="reference internal" href="API_Reference_Guide.html">5. API Reference Guide</a></li>
<li class="toctree-l1"><a class="reference internal" href="Contributors_Guide.html">6. Contributor’s Guide</a></li>
<li class="toctree-l1"><a class="reference internal" href="Disclaimer.html">7. Disclaimer</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="index.html">Composable Kernel (CK)</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item active"><span class="section-number">3. </span>CK docker hub</li>
<li class="wy-breadcrumbs-aside">
<a href="_sources/dockerhub.rst.txt" rel="nofollow"> View page source</a>
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<section id="ck-docker-hub">
<h1><span class="section-number">3. </span>CK docker hub<a class="headerlink" href="#ck-docker-hub" title="Permalink to this heading"></a></h1>
<p><a class="reference external" href="https://hub.docker.com/r/rocm/composable_kernel">Docker hub</a></p>
<section id="why-do-i-need-this">
<h2><span class="section-number">3.1. </span>Why do I need this?<a class="headerlink" href="#why-do-i-need-this" title="Permalink to this heading"></a></h2>
<p>To make our lives easier and bring Composable Kernel dependencies together, we recommend using docker images.</p>
</section>
<section id="so-what-is-composable-kernel">
<h2><span class="section-number">3.2. </span>So what is Composable Kernel?<a class="headerlink" href="#so-what-is-composable-kernel" title="Permalink to this heading"></a></h2>
<p>Composable Kernel (CK) library aims to provide a programming model for writing performance critical kernels for machine learning workloads across multiple architectures including GPUs, CPUs, etc, through general purpose kernel languages, like HIP C++.</p>
<p>To get the CK library:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">git</span> <span class="n">clone</span> <span class="n">https</span><span class="p">:</span><span class="o">//</span><span class="n">github</span><span class="o">.</span><span class="n">com</span><span class="o">/</span><span class="n">ROCmSoftwarePlatform</span><span class="o">/</span><span class="n">composable_kernel</span><span class="o">.</span><span class="n">git</span>
</pre></div>
</div>
<p>run a docker container:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span>docker run \
-it \
--privileged \
--group-add sudo \
-w /root/workspace \
-v ${PATH_TO_LOCAL_WORKSPACE}:/root/workspace \
rocm/composable_kernel:ck_ub20.04_rocm5.3_release \
/bin/bash
</pre></div>
</div>
<p>and build the CK:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">mkdir</span> <span class="n">build</span> <span class="o">&amp;&amp;</span> <span class="n">cd</span> <span class="n">build</span>
<span class="c1"># Need to specify target ID, example below is for gfx908 and gfx90a</span>
<span class="n">cmake</span> \
<span class="o">-</span><span class="n">D</span> <span class="n">CMAKE_PREFIX_PATH</span><span class="o">=/</span><span class="n">opt</span><span class="o">/</span><span class="n">rocm</span> \
<span class="o">-</span><span class="n">D</span> <span class="n">CMAKE_CXX_COMPILER</span><span class="o">=/</span><span class="n">opt</span><span class="o">/</span><span class="n">rocm</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">hipcc</span> \
<span class="o">-</span><span class="n">D</span> <span class="n">CMAKE_CXX_FLAGS</span><span class="o">=</span><span class="s2">&quot;-O3&quot;</span> \
<span class="o">-</span><span class="n">D</span> <span class="n">CMAKE_BUILD_TYPE</span><span class="o">=</span><span class="n">Release</span> \
<span class="o">-</span><span class="n">D</span> <span class="n">GPU_TARGETS</span><span class="o">=</span><span class="s2">&quot;gfx908;gfx90a&quot;</span> \
<span class="o">..</span>
</pre></div>
</div>
<p>and:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">make</span> <span class="o">-</span><span class="n">j</span> <span class="n">examples</span> <span class="n">tests</span>
</pre></div>
</div>
<p>To run all the test cases including tests and examples run:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">make</span> <span class="n">test</span>
</pre></div>
</div>
<p>We can also run specific examples or tests like:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="o">./</span><span class="nb">bin</span><span class="o">/</span><span class="n">example_gemm_xdl_fp16</span>
<span class="o">./</span><span class="nb">bin</span><span class="o">/</span><span class="n">test_gemm_fp16</span>
</pre></div>
</div>
<p>For more details visit <a class="reference external" href="https://github.com/ROCmSoftwarePlatform/composable_kernel">CK github repo</a>, <a class="reference external" href="https://github.com/ROCmSoftwarePlatform/composable_kernel/tree/develop/example)">CK examples</a>, <a class="reference external" href="https://github.com/ROCmSoftwarePlatform/composable_kernel/tree/develop/client_example">even more CK examples</a>.</p>
</section>
<section id="and-what-is-inside">
<h2><span class="section-number">3.3. </span>And what is inside?<a class="headerlink" href="#and-what-is-inside" title="Permalink to this heading"></a></h2>
<p>The docker images have everything you need for running CK including:</p>
<ul class="simple">
<li><p><a class="reference external" href="https://www.amd.com/en/graphics/servers-solutions-rocm">ROCm</a></p></li>
<li><p><a class="reference external" href="https://cmake.org/">CMake</a></p></li>
<li><p><a class="reference external" href="https://github.com/RadeonOpenCompute/llvm-project">Compiler</a></p></li>
</ul>
</section>
<section id="which-image-is-right-for-me">
<h2><span class="section-number">3.4. </span>Which image is right for me?<a class="headerlink" href="#which-image-is-right-for-me" title="Permalink to this heading"></a></h2>
<p>Let’s take a look at the image naming, for example “ck_ub20.04_rocm5.4_release”. The image specs are:</p>
<ul class="simple">
<li><p>“ck” - made for running Composable Kernel</p></li>
<li><p>“ub20.04” - based on Ubuntu 20.04</p></li>
<li><p>“rocm5.4” - ROCm platform version 5.4</p></li>
<li><p>“release” - compiler version is release</p></li>
</ul>
<p>So just pick the right image for your project dependencies and you’re all set.</p>
</section>
<section id="diy-starts-here">
<h2><span class="section-number">3.5. </span>DIY starts here<a class="headerlink" href="#diy-starts-here" title="Permalink to this heading"></a></h2>
<p>If you need to customize a docker image or just can’t stop tinkering, feel free to adjust the <a class="reference external" href="https://github.com/ROCmSoftwarePlatform/composable_kernel/blob/develop/Dockerfile">Dockerfile</a> for your needs.</p>
</section>
<section id="license">
<h2><span class="section-number">3.6. </span>License<a class="headerlink" href="#license" title="Permalink to this heading"></a></h2>
<p>CK is released under the MIT <a class="reference external" href="https://github.com/ROCmSoftwarePlatform/composable_kernel/blob/develop/LICENSE">license</a>.</p>
</section>
</section>
</div>
</div>
<footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
<a href="tutorial_hello_world.html" class="btn btn-neutral float-left" title="2. CK Hello world" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
<a href="Supported_Primitives_Guide.html" class="btn btn-neutral float-right" title="4. Supported Primitives Guide" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
</div>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2018-2023, Advanced Micro Devices.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>
\ No newline at end of file
<!DOCTYPE html>
<html class="writer-html5" lang="en" >
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Index &mdash; Composable Kernel (CK) documentation</title>
<link rel="stylesheet" href="_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
<!--[if lt IE 9]>
<script src="_static/js/html5shiv.min.js"></script>
<![endif]-->
<script data-url_root="./" id="documentation_options" src="_static/documentation_options.js"></script>
<script src="_static/doctools.js"></script>
<script src="_static/sphinx_highlight.js"></script>
<script src="_static/js/theme.js"></script>
<link rel="index" title="Index" href="#" />
<link rel="search" title="Search" href="search.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="index.html">
<img src="_static/rocm_logo.png" class="logo" alt="Logo"/>
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="Linux_Install_Guide.html">1. Getting Started Guide</a></li>
<li class="toctree-l1"><a class="reference internal" href="tutorial_hello_world.html">2. CK Hello world</a></li>
<li class="toctree-l1"><a class="reference internal" href="dockerhub.html">3. CK docker hub</a></li>
<li class="toctree-l1"><a class="reference internal" href="Supported_Primitives_Guide.html">4. Supported Primitives Guide</a></li>
<li class="toctree-l1"><a class="reference internal" href="API_Reference_Guide.html">5. API Reference Guide</a></li>
<li class="toctree-l1"><a class="reference internal" href="Contributors_Guide.html">6. Contributor’s Guide</a></li>
<li class="toctree-l1"><a class="reference internal" href="Disclaimer.html">7. Disclaimer</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="index.html">Composable Kernel (CK)</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item active">Index</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<h1 id="index">Index</h1>
<div class="genindex-jumpbox">
<a href="#C"><strong>C</strong></a>
| <a href="#D"><strong>D</strong></a>
</div>
<h2 id="C">C</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
<td style="width: 33%; vertical-align: top;"><ul>
<li><a href="API_Reference_Guide.html#_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E">ck::BlockwiseGemmXdlops_v2 (C++ struct)</a>
</li>
<li><a href="API_Reference_Guide.html#_CPPv4I_7index_t0000_bEN2ck16BlockwiseSoftmaxE">ck::BlockwiseSoftmax (C++ struct)</a>
</li>
</ul></td>
<td style="width: 33%; vertical-align: top;"><ul>
<li><a href="API_Reference_Guide.html#_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE">ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle (C++ struct)</a>
</li>
<li><a href="API_Reference_Guide.html#_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E">ck::ThreadGroupTensorSliceTransfer_v4r1 (C++ struct)</a>
</li>
<li><a href="API_Reference_Guide.html#_CPPv4I0000000_7index_t_7index_t_N9enable_ifIXaaclN7SrcDesc20IsKnownAtCompileTimeEEclN7DstDesc20IsKnownAtCompileTimeEEEbE4typeEEN2ck44ThreadwiseTensorSliceTransfer_StaticToStaticE">ck::ThreadwiseTensorSliceTransfer_StaticToStatic (C++ struct)</a>
</li>
</ul></td>
</tr></table>
<h2 id="D">D</h2>
<table style="width: 100%" class="indextable genindextable"><tr>
<td style="width: 33%; vertical-align: top;"><ul>
<li><a href="API_Reference_Guide.html#_CPPv49DeviceMem">DeviceMem (C++ struct)</a>
</li>
</ul></td>
</tr></table>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2018-2023, Advanced Micro Devices.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>
\ No newline at end of file
<!DOCTYPE html>
<html class="writer-html5" lang="en" >
<head>
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Composable Kernel User Guide &mdash; Composable Kernel (CK) documentation</title>
<link rel="stylesheet" href="_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
<!--[if lt IE 9]>
<script src="_static/js/html5shiv.min.js"></script>
<![endif]-->
<script data-url_root="./" id="documentation_options" src="_static/documentation_options.js"></script>
<script src="_static/doctools.js"></script>
<script src="_static/sphinx_highlight.js"></script>
<script src="_static/js/theme.js"></script>
<link rel="index" title="Index" href="genindex.html" />
<link rel="search" title="Search" href="search.html" />
<link rel="next" title="1. Getting Started Guide" href="Linux_Install_Guide.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="#">
<img src="_static/rocm_logo.png" class="logo" alt="Logo"/>
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="Linux_Install_Guide.html">1. Getting Started Guide</a></li>
<li class="toctree-l1"><a class="reference internal" href="tutorial_hello_world.html">2. CK Hello world</a></li>
<li class="toctree-l1"><a class="reference internal" href="dockerhub.html">3. CK docker hub</a></li>
<li class="toctree-l1"><a class="reference internal" href="Supported_Primitives_Guide.html">4. Supported Primitives Guide</a></li>
<li class="toctree-l1"><a class="reference internal" href="API_Reference_Guide.html">5. API Reference Guide</a></li>
<li class="toctree-l1"><a class="reference internal" href="Contributors_Guide.html">6. Contributor’s Guide</a></li>
<li class="toctree-l1"><a class="reference internal" href="Disclaimer.html">7. Disclaimer</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="#">Composable Kernel (CK)</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="#" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item active">Composable Kernel User Guide</li>
<li class="wy-breadcrumbs-aside">
<a href="_sources/index.rst.txt" rel="nofollow"> View page source</a>
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<section id="composable-kernel-user-guide">
<h1>Composable Kernel User Guide<a class="headerlink" href="#composable-kernel-user-guide" title="Permalink to this heading"></a></h1>
<div class="toctree-wrapper compound">
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="Linux_Install_Guide.html">1. Getting Started Guide</a><ul>
<li class="toctree-l2"><a class="reference internal" href="Linux_Install_Guide.html#introduction">1.1. Introduction</a><ul>
<li class="toctree-l3"><a class="reference internal" href="Linux_Install_Guide.html#documentation-roadmap">1.1.1. Documentation Roadmap</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="tutorial_hello_world.html">2. CK Hello world</a><ul>
<li class="toctree-l2"><a class="reference internal" href="tutorial_hello_world.html#motivation">2.1. Motivation</a></li>
<li class="toctree-l2"><a class="reference internal" href="tutorial_hello_world.html#description">2.2. Description</a></li>
<li class="toctree-l2"><a class="reference internal" href="tutorial_hello_world.html#hardware-targets">2.3. Hardware targets</a></li>
<li class="toctree-l2"><a class="reference internal" href="tutorial_hello_world.html#build-the-library">2.4. Build the library</a></li>
<li class="toctree-l2"><a class="reference internal" href="tutorial_hello_world.html#run-examples-and-tests">2.5. Run examples and tests</a></li>
<li class="toctree-l2"><a class="reference internal" href="tutorial_hello_world.html#summary">2.6. Summary</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="dockerhub.html">3. CK docker hub</a><ul>
<li class="toctree-l2"><a class="reference internal" href="dockerhub.html#why-do-i-need-this">3.1. Why do I need this?</a></li>
<li class="toctree-l2"><a class="reference internal" href="dockerhub.html#so-what-is-composable-kernel">3.2. So what is Composable Kernel?</a></li>
<li class="toctree-l2"><a class="reference internal" href="dockerhub.html#and-what-is-inside">3.3. And what is inside?</a></li>
<li class="toctree-l2"><a class="reference internal" href="dockerhub.html#which-image-is-right-for-me">3.4. Which image is right for me?</a></li>
<li class="toctree-l2"><a class="reference internal" href="dockerhub.html#diy-starts-here">3.5. DIY starts here</a></li>
<li class="toctree-l2"><a class="reference internal" href="dockerhub.html#license">3.6. License</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="Supported_Primitives_Guide.html">4. Supported Primitives Guide</a><ul>
<li class="toctree-l2"><a class="reference internal" href="Supported_Primitives_Guide.html#softmax">4.1. Softmax</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="API_Reference_Guide.html">5. API Reference Guide</a><ul>
<li class="toctree-l2"><a class="reference internal" href="API_Reference_Guide.html#introduction">5.1. Introduction</a></li>
<li class="toctree-l2"><a class="reference internal" href="API_Reference_Guide.html#using-ck-api">5.2. Using CK API</a></li>
<li class="toctree-l2"><a class="reference internal" href="API_Reference_Guide.html#ck-datatypes">5.3. CK Datatypes</a><ul>
<li class="toctree-l3"><a class="reference internal" href="API_Reference_Guide.html#devicemem">5.3.1. DeviceMem</a></li>
<li class="toctree-l3"><a class="reference internal" href="API_Reference_Guide.html#kernels-for-flashattention">5.3.2. Kernels For Flashattention</a></li>
</ul>
</li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="Contributors_Guide.html">6. Contributor’s Guide</a><ul>
<li class="toctree-l2"><a class="reference internal" href="Contributors_Guide.html#pull-request-guidelines">6.1. Pull-request guidelines</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="Disclaimer.html">7. Disclaimer</a><ul>
<li class="toctree-l2"><a class="reference internal" href="Disclaimer.html#amd-s-standard-legal-disclaimer">7.1. AMD’s standard legal Disclaimer</a></li>
<li class="toctree-l2"><a class="reference internal" href="Disclaimer.html#third-party-disclaimer">7.2. Third Party Disclaimer</a></li>
</ul>
</li>
</ul>
</div>
</section>
</div>
</div>
<footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
<a href="Linux_Install_Guide.html" class="btn btn-neutral float-right" title="1. Getting Started Guide" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
</div>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2018-2023, Advanced Micro Devices.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>
\ No newline at end of file
File added
<!DOCTYPE html>
<html class="writer-html5" lang="en" >
<head>
<meta charset="utf-8" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>Search &mdash; Composable Kernel (CK) documentation</title>
<link rel="stylesheet" href="_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
<!--[if lt IE 9]>
<script src="_static/js/html5shiv.min.js"></script>
<![endif]-->
<script data-url_root="./" id="documentation_options" src="_static/documentation_options.js"></script>
<script src="_static/doctools.js"></script>
<script src="_static/sphinx_highlight.js"></script>
<script src="_static/js/theme.js"></script>
<script src="_static/searchtools.js"></script>
<script src="_static/language_data.js"></script>
<link rel="index" title="Index" href="genindex.html" />
<link rel="search" title="Search" href="#" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="index.html">
<img src="_static/rocm_logo.png" class="logo" alt="Logo"/>
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="#" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
<ul>
<li class="toctree-l1"><a class="reference internal" href="Linux_Install_Guide.html">1. Getting Started Guide</a></li>
<li class="toctree-l1"><a class="reference internal" href="tutorial_hello_world.html">2. CK Hello world</a></li>
<li class="toctree-l1"><a class="reference internal" href="dockerhub.html">3. CK docker hub</a></li>
<li class="toctree-l1"><a class="reference internal" href="Supported_Primitives_Guide.html">4. Supported Primitives Guide</a></li>
<li class="toctree-l1"><a class="reference internal" href="API_Reference_Guide.html">5. API Reference Guide</a></li>
<li class="toctree-l1"><a class="reference internal" href="Contributors_Guide.html">6. Contributor’s Guide</a></li>
<li class="toctree-l1"><a class="reference internal" href="Disclaimer.html">7. Disclaimer</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="index.html">Composable Kernel (CK)</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item active">Search</li>
<li class="wy-breadcrumbs-aside">
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<noscript>
<div id="fallback" class="admonition warning">
<p class="last">
Please activate JavaScript to enable the search functionality.
</p>
</div>
</noscript>
<div id="search-results">
</div>
</div>
</div>
<footer>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2018-2023, Advanced Micro Devices.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
<script>
jQuery(function() { Search.loadIndex("searchindex.js"); });
</script>
<script id="searchindexloader"></script>
</body>
</html>
\ No newline at end of file
Search.setIndex({"docnames": ["API_Reference_Guide", "Contributors_Guide", "Disclaimer", "Linux_Install_Guide", "Supported_Primitives_Guide", "dockerhub", "index", "tutorial_hello_world"], "filenames": ["API_Reference_Guide.rst", "Contributors_Guide.rst", "Disclaimer.rst", "Linux_Install_Guide.rst", "Supported_Primitives_Guide.rst", "dockerhub.rst", "index.rst", "tutorial_hello_world.rst"], "titles": ["<span class=\"section-number\">5. </span>API Reference Guide", "<span class=\"section-number\">6. </span>Contributor\u2019s Guide", "<span class=\"section-number\">7. </span>Disclaimer", "<span class=\"section-number\">1. </span>Getting Started Guide", "<span class=\"section-number\">4. </span>Supported Primitives Guide", "<span class=\"section-number\">3. </span>CK docker hub", "Composable Kernel User Guide", "<span class=\"section-number\">2. </span>CK Hello world"], "terms": {"thi": [0, 2, 3, 4, 6, 7], "document": [0, 2, 4, 6], "contain": [0, 2, 3, 4, 5, 7], "detail": [0, 4, 5, 7], "compos": [0, 3, 4, 7], "librari": [0, 5, 6], "introduc": 0, "some": [0, 7], "kei": 0, "design": 0, "principl": 0, "ar": [0, 2, 5, 7], "write": [0, 5], "new": [0, 2, 4, 7], "class": 0, "extend": 0, "function": [0, 7], "section": [0, 7], "describ": 0, "how": [0, 7], "struct": 0, "store": 0, "data": [0, 7], "gpu": [0, 5, 7], "devic": [0, 2, 7], "memori": 0, "The": [0, 2, 3, 5, 7], "algorithm": [0, 4], "i": [0, 2, 3, 4, 6, 7], "defin": 0, "dao": 0, "et": 0, "al": 0, "dfe": 0, "22": 0, "list": [0, 3, 7], "implement": [0, 4], "gridwis": 0, "templat": 0, "typenam": 0, "floatab": 0, "floatgemmacc": 0, "floatcshuffl": 0, "floatc": 0, "aelementwiseoper": 0, "belementwiseoper": 0, "accelementwiseoper": 0, "b1elementwiseoper": 0, "celementwiseoper": 0, "inmemorydataoperationenum": 0, "cglobalmemorydataoper": 0, "agriddesc_ak0_m_ak1": 0, "bgriddesc_bk0_n_bk1": 0, "b1griddesc_bk0_n_bk1": 0, "cgriddesc_m_n": 0, "index_t": 0, "numgemmkprefetchstag": 0, "blocksiz": 0, "mperblock": 0, "nperblock": 0, "kperblock": 0, "gemm1nperblock": 0, "gemm1kperblock": 0, "ak1valu": 0, "bk1valu": 0, "b1k1valu": 0, "mperxdl": 0, "nperxdl": 0, "mxdlperwav": 0, "nxdlperwav": 0, "gemm1nxdlperwav": 0, "ablocktransferthreadclusterlengths_ak0_m_ak1": 0, "ablocktransferthreadclusterarrangeord": 0, "ablocktransfersrcaccessord": 0, "ablocktransfersrcvectordim": 0, "ablocktransfersrcscalarpervector": 0, "ablocktransferdstscalarpervector_ak1": 0, "bool": 0, "athreadtransfersrcresetcoordinateafterrun": 0, "ablockldsextram": 0, "bblocktransferthreadclusterlengths_bk0_n_bk1": 0, "bblocktransferthreadclusterarrangeord": 0, "bblocktransfersrcaccessord": 0, "bblocktransfersrcvectordim": 0, "bblocktransfersrcscalarpervector": 0, "bblocktransferdstscalarpervector_bk1": 0, "bthreadtransfersrcresetcoordinateafterrun": 0, "bblockldsextran": 0, "b1blocktransferthreadclusterlengths_bk0_n_bk1": 0, "b1blocktransferthreadclusterarrangeord": 0, "b1blocktransfersrcaccessord": 0, "b1blocktransfersrcvectordim": 0, "b1blocktransfersrcscalarpervector": 0, "b1blocktransferdstscalarpervector_bk1": 0, "b1threadtransfersrcresetcoordinateafterrun": 0, "b1blockldsextran": 0, "cshufflemxdlperwavepershuffl": 0, "cshufflenxdlperwavepershuffl": 0, "cshuffleblocktransferclusterlengths_mblock_mperblock_nblock_nperblock": 0, "cshuffleblocktransferscalarpervector_nperblock": 0, "loopschedul": [0, 7], "loopsch": 0, "padn": 0, "maskoutuppertriangl": 0, "pipelinevers": [0, 7], "pipelinev": 0, "v1": [0, 7], "gridwisebatchedgemmsoftmaxgemm_xdl_cshuffl": 0, "gemm": 0, "softmax": [0, 6], "fusion": 0, "blockwis": 0, "threadgroup": 0, "srcelementwiseoper": 0, "dstelementwiseoper": 0, "dstinmemop": 0, "blockslicelength": 0, "threadclusterlength": 0, "threadclusterarrangeord": 0, "srcdata": 0, "dstdata": 0, "srcdesc": 0, "dstdesc": 0, "srcdimaccessord": 0, "dstdimaccessord": 0, "srcvectordim": 0, "dstvectordim": 0, "srcscalarpervector": 0, "dstscalarpervector": 0, "srcscalarstrideinvector": 0, "dstscalarstrideinvector": 0, "threadtransfersrcresetcoordinateafterrun": 0, "threadtransferdstresetcoordinateafterrun": 0, "numthreadscratch": 0, "1": [0, 4, 7], "threadgrouptensorslicetransfer_v4r1": 0, "transfer": 0, "version": [0, 2, 5, 7], "doe": [0, 7], "follow": [0, 3, 4], "thing": 0, "avoid": 0, "scratch": 0, "issu": 0, "staticallyindexedarrai": 0, "instead": 0, "c": [0, 2, 5], "arrai": 0, "thread": 0, "buffer": 0, "threadwisetensorslicetransfer_v3": 0, "keep": 0, "tensor": [0, 7], "descriptor": 0, "run": [0, 4, 5, 6], "construct": 0, "coordin": [0, 7], "floatacc": 0, "atiledesc": 0, "btiledesc": 0, "ammatiledesc": 0, "bmmatiledesc": 0, "mrepeat": 0, "nrepeat": 0, "kpack": 0, "transposec": 0, "fals": 0, "ammakstrid": 0, "xdlopsgemm": 0, "k0perxdlop": 0, "bmmakstrid": 0, "blockwisegemmxdlops_v2": 0, "support": [0, 6, 7], "regular": 0, "xdl": 0, "output": [0, 4, 7], "m2_m3_m4_m2": 0, "transpos": 0, "m2_n2_n3_n4": 0, "decoupl": 0, "input": 0, "tile": [0, 4, 7], "mma": 0, "order": [0, 3], "both": 0, "vgpr": 0, "ld": 0, "sourc": 0, "configur": [0, 7], "k": [0, 4], "index": 0, "start": [0, 6, 7], "posit": 0, "step": 0, "size": [0, 4], "after": 0, "each": 0, "fma": 0, "instruct": [0, 3], "accdatatyp": 0, "threadmap_m_k": 0, "threadclusterdesc_m_k": 0, "threadslicedesc_m_k": 0, "ignorenan": 0, "blockwisesoftmax": 0, "paramet": [0, 7], "block": [0, 4], "accumul": 0, "type": [0, 7], "id": [0, 5], "m_k": 0, "threadwis": 0, "cluster": 0, "slice": 0, "flag": [0, 7], "ignor": 0, "nan": 0, "default": [0, 7], "elementwiseoper": 0, "slicelength": 0, "dimaccessord": 0, "enable_if": 0, "isknownatcompiletim": 0, "threadwisetensorslicetransfer_statictostat": 0, "do": [0, 6, 7], "NOT": 0, "involv": 0, "ani": [0, 2], "staticbuff": 0, "tri": 0, "daniel": 0, "y": [0, 4], "fu": 0, "stefano": 0, "ermon": 0, "atri": 0, "rudra": 0, "christoph": 0, "r": [0, 2, 7], "\u00e9": 0, "fast": [0, 7], "effici": [0, 7], "exact": 0, "attent": [0, 7], "io": 0, "awar": 0, "arxiv": 0, "preprint": 0, "2205": 0, "14135": 0, "2022": 0, "todo": [1, 3], "inform": 2, "present": 2, "purpos": [2, 5], "onli": [2, 7], "mai": 2, "technic": [2, 7], "inaccuraci": 2, "omiss": 2, "typograph": 2, "error": 2, "herein": 2, "subject": 2, "chang": [2, 7], "render": 2, "inaccur": 2, "mani": [2, 7], "reason": 2, "includ": [2, 5, 7], "limit": 2, "product": 2, "roadmap": [2, 6], "compon": [2, 7], "motherboard": 2, "model": [2, 5, 7], "releas": [2, 5, 7], "differ": [2, 7], "between": 2, "manufactur": 2, "softwar": 2, "bio": 2, "flash": 2, "firmwar": 2, "upgrad": 2, "like": [2, 5, 7], "comput": [2, 4, 7], "system": [2, 7], "ha": [2, 7], "risk": 2, "secur": 2, "vulner": 2, "cannot": 2, "complet": 2, "prevent": 2, "mitig": 2, "assum": 2, "oblig": 2, "updat": [2, 4], "otherwis": 2, "correct": 2, "revis": 2, "howev": 2, "reserv": 2, "right": [2, 6, 7], "make": [2, 5, 7], "from": [2, 4, 7], "time": [2, 4, 7], "content": 2, "hereof": 2, "without": 2, "notifi": 2, "person": 2, "provid": [2, 5], "AS": 2, "NO": 2, "represent": 2, "OR": 2, "warranti": 2, "WITH": 2, "respect": 2, "TO": 2, "THE": 2, "AND": 2, "respons": 2, "FOR": 2, "THAT": 2, "appear": 2, "IN": 2, "specif": [2, 5], "impli": 2, "OF": 2, "non": 2, "infring": 2, "merchant": 2, "fit": [2, 7], "particular": 2, "event": 2, "WILL": 2, "BE": 2, "liabl": 2, "relianc": 2, "direct": 2, "indirect": 2, "special": 2, "other": [2, 7], "consequenti": 2, "damag": 2, "aris": 2, "us": [2, 3, 5, 6, 7], "even": [2, 5], "IF": 2, "expressli": 2, "advis": 2, "possibl": [2, 7], "SUCH": 2, "arrow": 2, "logo": 2, "radeon": [2, 7], "ryzen": 2, "epyc": 2, "combin": 2, "thereof": 2, "trademark": 2, "advanc": 2, "micro": 2, "inc": 2, "name": [2, 5], "public": 2, "identif": 2, "compani": 2, "googl": 2, "regist": 2, "llc": 2, "pcie": 2, "pci": 2, "sig": 2, "corpor": 2, "linux": 2, "linu": 2, "torvald": 2, "u": 2, "countri": 2, "ubuntu": [2, 5, 7], "canon": 2, "ltd": 2, "2023": 2, "all": [2, 5, 7], "licens": [2, 6], "you": [2, 5, 7], "directli": 2, "own": 2, "link": 2, "A": 2, "kind": 2, "done": [2, 7], "AT": 2, "your": [2, 5, 7], "sole": 2, "discret": 2, "under": [2, 5], "circumst": 2, "instal": 3, "contribut": 3, "kernel": [3, 4, 7], "ck": [3, 4, 6], "suggest": 3, "read": [3, 7], "In": [4, 7], "contrast": 4, "api": [4, 6], "refer": [4, 6], "an": [4, 7], "introduct": [4, 6, 7], "math": 4, "which": [4, 6, 7], "underpin": 4, "For": [4, 5, 6, 7], "vector": 4, "x": 4, "2": [4, 7], "ldot": 4, "t": [4, 5, 7], "b": 4, "we": [4, 5, 7], "can": [4, 5, 7], "decompos": 4, "concaten": 4, "begin": 4, "align": 4, "m": [4, 7], "max": 4, "f": 4, "exp": 4, "z": 4, "operatornam": 4, "end": [4, 7], "where": 4, "j": [4, 5, 7], "x_1": 4, "x_b": 4, "scalar": 4, "matrix": [4, 7], "t_r": 4, "t_c": 4, "x_": 4, "ij": 4, "b_r": 4, "b_c": 4, "row": 4, "wise": 4, "calcul": 4, "tild": 4, "_": 4, "rowmax": 4, "p": [4, 7], "rowsum": 4, "p_": 4, "If": [4, 5, 7], "initi": [4, 7], "sum": 4, "first": [4, 7], "column": 4, "m_i": 4, "i1": 4, "z_i": 4, "diag": 4, "els": 4, "_i": 4, "y_": 4, "ik": 4, "z_": 4, "reset": 4, "variabl": 4, "To": [5, 7], "our": [5, 7], "live": [5, 7], "easier": [5, 7], "bring": 5, "depend": [5, 7], "togeth": 5, "recommend": 5, "aim": [5, 7], "program": 5, "perform": [5, 7], "critic": 5, "machin": [5, 7], "learn": [5, 7], "workload": 5, "across": 5, "multipl": [5, 7], "architectur": [5, 7], "cpu": [5, 7], "etc": 5, "through": 5, "gener": [5, 7], "languag": 5, "hip": 5, "get": [5, 6, 7], "git": [5, 7], "clone": [5, 7], "http": [5, 7], "github": [5, 7], "com": [5, 7], "rocmsoftwareplatform": [5, 7], "composable_kernel": [5, 7], "privileg": [5, 7], "group": [5, 7], "add": [5, 7], "sudo": [5, 7], "w": [5, 7], "root": [5, 7], "workspac": [5, 7], "v": [5, 7], "path_to_local_workspac": 5, "rocm": [5, 7], "ck_ub20": [5, 7], "04_rocm5": [5, 7], "3_releas": [5, 7], "bin": [5, 7], "bash": [5, 7], "build": [5, 6], "mkdir": [5, 7], "cd": [5, 7], "specifi": 5, "target": [5, 6], "exampl": [5, 6], "below": 5, "gfx908": [5, 7], "gfx90a": [5, 7], "cmake": [5, 7], "d": [5, 7], "cmake_prefix_path": [5, 7], "opt": [5, 7], "cmake_cxx_compil": [5, 7], "hipcc": [5, 7], "cmake_cxx_flag": [5, 7], "o3": [5, 7], "cmake_build_typ": [5, 7], "gpu_target": [5, 7], "test": [5, 6], "case": [5, 7], "also": [5, 7], "example_gemm_xdl_fp16": [5, 7], "test_gemm_fp16": [5, 7], "more": [5, 7], "visit": [5, 7], "repo": [5, 7], "have": [5, 7], "everyth": [5, 7], "compil": [5, 7], "let": [5, 7], "": [5, 6, 7], "take": 5, "look": [5, 7], "4_releas": 5, "spec": 5, "made": 5, "ub20": 5, "04": [5, 7], "base": [5, 7], "20": [5, 7], "rocm5": 5, "4": [5, 7], "platform": 5, "5": 5, "just": 5, "pick": [5, 7], "project": [5, 7], "re": 5, "set": 5, "custom": 5, "stop": 5, "tinker": 5, "feel": 5, "free": 5, "adjust": [5, 7], "dockerfil": 5, "mit": 5, "hello": 6, "world": 6, "motiv": 6, "descript": 6, "hardwar": 6, "summari": 6, "docker": [6, 7], "hub": 6, "why": 6, "need": [6, 7], "so": [6, 7], "what": 6, "And": 6, "insid": [6, 7], "imag": [6, 7], "me": 6, "dii": 6, "here": [6, 7], "primit": 6, "datatyp": 6, "devicemem": 6, "flashattent": 6, "contributor": 6, "pull": 6, "request": 6, "guidelin": 6, "disclaim": 6, "amd": [6, 7], "standard": 6, "legal": 6, "third": 6, "parti": 6, "tutori": 7, "engin": 7, "deal": 7, "artifici": 7, "intellig": 7, "who": 7, "would": 7, "optim": 7, "pipelin": 7, "squeez": 7, "everi": 7, "drop": 7, "ad": 7, "approach": 7, "latest": 7, "doesn": 7, "bleed": 7, "edg": 7, "featur": 7, "reproduc": 7, "now": 7, "forev": 7, "dure": 7, "sai": 7, "futur": 7, "go": 7, "depth": 7, "breadth": 7, "familiar": 7, "tool": 7, "wai": 7, "integr": 7, "modern": 7, "ai": 7, "technologi": 7, "solv": 7, "problem": 7, "imagin": 7, "field": 7, "craft": 7, "workflow": 7, "still": 7, "challeng": 7, "one": 7, "heavi": 7, "lift": 7, "collect": 7, "oper": 7, "creat": 7, "ones": 7, "requir": 7, "major": 7, "neural": 7, "network": 7, "convolut": 7, "contract": 7, "reduct": 7, "modul": 7, "varieti": 7, "activ": 7, "fuse": 7, "almost": 7, "reach": 7, "speed": 7, "light": 7, "acceler": 7, "abil": 7, "layer": 7, "structur": 7, "transform": 7, "low": 7, "precis": 7, "fp16": 7, "bf16": 7, "int8": 7, "int4": 7, "excit": 7, "benchmark": 7, "result": 7, "awesom": 7, "blog": 7, "post": 7, "fulli": 7, "gfx1030": 7, "check": 7, "hand": 7, "decid": 7, "instinct": 7, "mi100": 7, "mi210": 7, "mi250": 7, "mi250x": 7, "pro": 7, "v620": 7, "w6800": 7, "w6800x": 7, "duo": 7, "w6900x": 7, "rx": 7, "6800": 7, "xt": 7, "6900": 7, "xtx": 7, "6950": 7, "There": 7, "cloud": 7, "option": 7, "find": 7, "don": 7, "rebas": 7, "checkout": 7, "tutorial_hello_world": 7, "prepar": 7, "necessari": 7, "v5": 7, "3": 7, "current": 7, "folder": 7, "home": 7, "line": 7, "path": 7, "navig": 7, "directori": 7, "previou": 7, "talk": 7, "about": 7, "onc": 7, "build_dev": 7, "off": 7, "went": 7, "well": 7, "up": 7, "file": 7, "been": 7, "written": 7, "final": 7, "smooth": 7, "ll": 7, "see": 7, "scan": 7, "100": 7, "built": 7, "ctest": 7, "n": 7, "them": 7, "separ": 7, "execut": 7, "argument": 7, "mean": 7, "want": 7, "mode": 7, "verifi": 7, "matric": 7, "integ": 7, "plai": 7, "around": 7, "goe": 7, "should": 7, "someth": 7, "a_m_k": 7, "dim": 7, "length": 7, "3840": 7, "4096": 7, "stride": 7, "b_k_n": 7, "c_m_n": 7, "launch_and_time_kernel": 7, "grid_dim": 7, "480": 7, "block_dim": 7, "256": 7, "warm": 7, "10": 7, "perf": 7, "10017": 7, "117": 7, "tflop": 7, "87": 7, "6854": 7, "gb": 7, "devicegemmxdl": 7, "128": 7, "8": 7, "32": 7, "numprefetch": 7, "meanwhil": 7, "But": 7, "panic": 7, "example_gemm_dl_fp16": 7, "nice": 7, "similar": 7, "arg": 7, "a_grid_desc_k0_m0_m1_k1_": 7, "2048": 7, "b_grid_desc_k0_n0_n1_k1_": 7, "c_grid_desc_m_n_": 7, "960": 7, "65695": 7, "35": 7, "234": 7, "26": 7, "3797": 7, "devicegemmdl": 7, "16": 7, "Or": 7, "121": 7, "pass": 7, "51": 7, "81": 7, "sec": 7, "0": 7, "fail": 7, "out": 7, "took": 7, "ran": 7, "stai": 7, "tune": 7, "next": 7, "config": 7, "best": 7, "task": 7, "forget": 7, "switch": 7, "instanc": 7, "launch": 7, "better": 7, "spend": 7, "monei": 7, "sure": 7}, "objects": {"": [[0, 0, 1, "_CPPv49DeviceMem", "DeviceMem"], [0, 0, 1, "_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E", "ck::BlockwiseGemmXdlops_v2"], [0, 1, 1, "_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E", "ck::BlockwiseGemmXdlops_v2::AMmaKStride"], [0, 1, 1, "_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E", "ck::BlockwiseGemmXdlops_v2::AMmaTileDesc"], [0, 1, 1, "_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E", "ck::BlockwiseGemmXdlops_v2::ATileDesc"], [0, 1, 1, "_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E", "ck::BlockwiseGemmXdlops_v2::BMmaKStride"], [0, 1, 1, "_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E", "ck::BlockwiseGemmXdlops_v2::BMmaTileDesc"], [0, 1, 1, "_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E", "ck::BlockwiseGemmXdlops_v2::BTileDesc"], [0, 1, 1, "_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E", "ck::BlockwiseGemmXdlops_v2::BlockSize"], [0, 1, 1, "_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E", "ck::BlockwiseGemmXdlops_v2::FloatAB"], [0, 1, 1, "_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E", "ck::BlockwiseGemmXdlops_v2::FloatAcc"], [0, 1, 1, "_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E", "ck::BlockwiseGemmXdlops_v2::KPack"], [0, 1, 1, "_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E", "ck::BlockwiseGemmXdlops_v2::KPerBlock"], [0, 1, 1, "_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E", "ck::BlockwiseGemmXdlops_v2::MPerBlock"], [0, 1, 1, "_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E", "ck::BlockwiseGemmXdlops_v2::MPerXDL"], [0, 1, 1, "_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E", "ck::BlockwiseGemmXdlops_v2::MRepeat"], [0, 1, 1, "_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E", "ck::BlockwiseGemmXdlops_v2::NPerBlock"], [0, 1, 1, "_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E", "ck::BlockwiseGemmXdlops_v2::NPerXDL"], [0, 1, 1, "_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E", "ck::BlockwiseGemmXdlops_v2::NRepeat"], [0, 1, 1, "_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E", "ck::BlockwiseGemmXdlops_v2::TransposeC"], [0, 0, 1, "_CPPv4I_7index_t0000_bEN2ck16BlockwiseSoftmaxE", "ck::BlockwiseSoftmax"], [0, 1, 1, "_CPPv4I_7index_t0000_bEN2ck16BlockwiseSoftmaxE", "ck::BlockwiseSoftmax::AccDataType"], [0, 1, 1, "_CPPv4I_7index_t0000_bEN2ck16BlockwiseSoftmaxE", "ck::BlockwiseSoftmax::BlockSize"], [0, 1, 1, "_CPPv4I_7index_t0000_bEN2ck16BlockwiseSoftmaxE", "ck::BlockwiseSoftmax::IgnoreNaN"], [0, 1, 1, "_CPPv4I_7index_t0000_bEN2ck16BlockwiseSoftmaxE", "ck::BlockwiseSoftmax::ThreadClusterDesc_M_K"], [0, 1, 1, "_CPPv4I_7index_t0000_bEN2ck16BlockwiseSoftmaxE", "ck::BlockwiseSoftmax::ThreadMap_M_K"], [0, 1, 1, "_CPPv4I_7index_t0000_bEN2ck16BlockwiseSoftmaxE", "ck::BlockwiseSoftmax::ThreadSliceDesc_M_K"], [0, 0, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::ABlockLdsExtraM"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::ABlockTransferDstScalarPerVector_AK1"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::ABlockTransferSrcAccessOrder"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::ABlockTransferSrcScalarPerVector"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::ABlockTransferSrcVectorDim"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::ABlockTransferThreadClusterArrangeOrder"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::ABlockTransferThreadClusterLengths_AK0_M_AK1"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::AElementwiseOperation"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::AGridDesc_AK0_M_AK1"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::AK1Value"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::AThreadTransferSrcResetCoordinateAfterRun"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::AccElementwiseOperation"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::B1BlockLdsExtraN"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::B1BlockTransferDstScalarPerVector_BK1"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::B1BlockTransferSrcAccessOrder"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::B1BlockTransferSrcScalarPerVector"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::B1BlockTransferSrcVectorDim"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::B1BlockTransferThreadClusterArrangeOrder"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::B1BlockTransferThreadClusterLengths_BK0_N_BK1"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::B1ElementwiseOperation"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::B1GridDesc_BK0_N_BK1"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::B1K1Value"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::B1ThreadTransferSrcResetCoordinateAfterRun"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::BBlockLdsExtraN"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::BBlockTransferDstScalarPerVector_BK1"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::BBlockTransferSrcAccessOrder"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::BBlockTransferSrcScalarPerVector"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::BBlockTransferSrcVectorDim"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::BBlockTransferThreadClusterArrangeOrder"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::BBlockTransferThreadClusterLengths_BK0_N_BK1"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::BElementwiseOperation"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::BGridDesc_BK0_N_BK1"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::BK1Value"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::BThreadTransferSrcResetCoordinateAfterRun"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::BlockSize"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::CElementwiseOperation"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::CGlobalMemoryDataOperation"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::CGridDesc_M_N"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::CShuffleBlockTransferClusterLengths_MBlock_MPerBlock_NBlock_NPerBlock"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::CShuffleBlockTransferScalarPerVector_NPerBlock"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::CShuffleMXdlPerWavePerShuffle"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::CShuffleNXdlPerWavePerShuffle"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::FloatAB"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::FloatC"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::FloatCShuffle"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::FloatGemmAcc"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::Gemm1KPerBlock"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::Gemm1NPerBlock"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::Gemm1NXdlPerWave"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::KPerBlock"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::LoopSched"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::MPerBlock"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::MPerXdl"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::MXdlPerWave"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::MaskOutUpperTriangle"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::NPerBlock"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::NPerXdl"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::NXdlPerWave"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::NumGemmKPrefetchStage"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::PadN"], [0, 1, 1, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE", "ck::GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffle::PipelineVer"], [0, 0, 1, "_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E", "ck::ThreadGroupTensorSliceTransfer_v4r1"], [0, 1, 1, "_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E", "ck::ThreadGroupTensorSliceTransfer_v4r1::BlockSliceLengths"], [0, 1, 1, "_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E", "ck::ThreadGroupTensorSliceTransfer_v4r1::DstData"], [0, 1, 1, "_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E", "ck::ThreadGroupTensorSliceTransfer_v4r1::DstDesc"], [0, 1, 1, "_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E", "ck::ThreadGroupTensorSliceTransfer_v4r1::DstDimAccessOrder"], [0, 1, 1, "_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E", "ck::ThreadGroupTensorSliceTransfer_v4r1::DstElementwiseOperation"], [0, 1, 1, "_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E", "ck::ThreadGroupTensorSliceTransfer_v4r1::DstInMemOp"], [0, 1, 1, "_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E", "ck::ThreadGroupTensorSliceTransfer_v4r1::DstScalarPerVector"], [0, 1, 1, "_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E", "ck::ThreadGroupTensorSliceTransfer_v4r1::DstScalarStrideInVector"], [0, 1, 1, "_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E", "ck::ThreadGroupTensorSliceTransfer_v4r1::DstVectorDim"], [0, 1, 1, "_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E", "ck::ThreadGroupTensorSliceTransfer_v4r1::NumThreadScratch"], [0, 1, 1, "_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E", "ck::ThreadGroupTensorSliceTransfer_v4r1::SrcData"], [0, 1, 1, "_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E", "ck::ThreadGroupTensorSliceTransfer_v4r1::SrcDesc"], [0, 1, 1, "_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E", "ck::ThreadGroupTensorSliceTransfer_v4r1::SrcDimAccessOrder"], [0, 1, 1, "_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E", "ck::ThreadGroupTensorSliceTransfer_v4r1::SrcElementwiseOperation"], [0, 1, 1, "_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E", "ck::ThreadGroupTensorSliceTransfer_v4r1::SrcScalarPerVector"], [0, 1, 1, "_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E", "ck::ThreadGroupTensorSliceTransfer_v4r1::SrcScalarStrideInVector"], [0, 1, 1, "_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E", "ck::ThreadGroupTensorSliceTransfer_v4r1::SrcVectorDim"], [0, 1, 1, "_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E", "ck::ThreadGroupTensorSliceTransfer_v4r1::ThreadClusterArrangeOrder"], [0, 1, 1, "_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E", "ck::ThreadGroupTensorSliceTransfer_v4r1::ThreadClusterLengths"], [0, 1, 1, "_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E", "ck::ThreadGroupTensorSliceTransfer_v4r1::ThreadGroup"], [0, 1, 1, "_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E", "ck::ThreadGroupTensorSliceTransfer_v4r1::ThreadTransferDstResetCoordinateAfterRun"], [0, 1, 1, "_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E", "ck::ThreadGroupTensorSliceTransfer_v4r1::ThreadTransferSrcResetCoordinateAfterRun"], [0, 0, 1, "_CPPv4I0000000_7index_t_7index_t_N9enable_ifIXaaclN7SrcDesc20IsKnownAtCompileTimeEEclN7DstDesc20IsKnownAtCompileTimeEEEbE4typeEEN2ck44ThreadwiseTensorSliceTransfer_StaticToStaticE", "ck::ThreadwiseTensorSliceTransfer_StaticToStatic"], [0, 1, 1, "_CPPv4I0000000_7index_t_7index_t_N9enable_ifIXaaclN7SrcDesc20IsKnownAtCompileTimeEEclN7DstDesc20IsKnownAtCompileTimeEEEbE4typeEEN2ck44ThreadwiseTensorSliceTransfer_StaticToStaticE", "ck::ThreadwiseTensorSliceTransfer_StaticToStatic::DimAccessOrder"], [0, 1, 1, "_CPPv4I0000000_7index_t_7index_t_N9enable_ifIXaaclN7SrcDesc20IsKnownAtCompileTimeEEclN7DstDesc20IsKnownAtCompileTimeEEEbE4typeEEN2ck44ThreadwiseTensorSliceTransfer_StaticToStaticE", "ck::ThreadwiseTensorSliceTransfer_StaticToStatic::DstData"], [0, 1, 1, "_CPPv4I0000000_7index_t_7index_t_N9enable_ifIXaaclN7SrcDesc20IsKnownAtCompileTimeEEclN7DstDesc20IsKnownAtCompileTimeEEEbE4typeEEN2ck44ThreadwiseTensorSliceTransfer_StaticToStaticE", "ck::ThreadwiseTensorSliceTransfer_StaticToStatic::DstDesc"], [0, 1, 1, "_CPPv4I0000000_7index_t_7index_t_N9enable_ifIXaaclN7SrcDesc20IsKnownAtCompileTimeEEclN7DstDesc20IsKnownAtCompileTimeEEEbE4typeEEN2ck44ThreadwiseTensorSliceTransfer_StaticToStaticE", "ck::ThreadwiseTensorSliceTransfer_StaticToStatic::DstScalarPerVector"], [0, 1, 1, "_CPPv4I0000000_7index_t_7index_t_N9enable_ifIXaaclN7SrcDesc20IsKnownAtCompileTimeEEclN7DstDesc20IsKnownAtCompileTimeEEEbE4typeEEN2ck44ThreadwiseTensorSliceTransfer_StaticToStaticE", "ck::ThreadwiseTensorSliceTransfer_StaticToStatic::DstVectorDim"], [0, 1, 1, "_CPPv4I0000000_7index_t_7index_t_N9enable_ifIXaaclN7SrcDesc20IsKnownAtCompileTimeEEclN7DstDesc20IsKnownAtCompileTimeEEEbE4typeEEN2ck44ThreadwiseTensorSliceTransfer_StaticToStaticE", "ck::ThreadwiseTensorSliceTransfer_StaticToStatic::ElementwiseOperation"], [0, 1, 1, "_CPPv4I0000000_7index_t_7index_t_N9enable_ifIXaaclN7SrcDesc20IsKnownAtCompileTimeEEclN7DstDesc20IsKnownAtCompileTimeEEEbE4typeEEN2ck44ThreadwiseTensorSliceTransfer_StaticToStaticE", "ck::ThreadwiseTensorSliceTransfer_StaticToStatic::SliceLengths"], [0, 1, 1, "_CPPv4I0000000_7index_t_7index_t_N9enable_ifIXaaclN7SrcDesc20IsKnownAtCompileTimeEEclN7DstDesc20IsKnownAtCompileTimeEEEbE4typeEEN2ck44ThreadwiseTensorSliceTransfer_StaticToStaticE", "ck::ThreadwiseTensorSliceTransfer_StaticToStatic::SrcData"], [0, 1, 1, "_CPPv4I0000000_7index_t_7index_t_N9enable_ifIXaaclN7SrcDesc20IsKnownAtCompileTimeEEclN7DstDesc20IsKnownAtCompileTimeEEEbE4typeEEN2ck44ThreadwiseTensorSliceTransfer_StaticToStaticE", "ck::ThreadwiseTensorSliceTransfer_StaticToStatic::SrcDesc"]]}, "objtypes": {"0": "cpp:class", "1": "cpp:templateParam"}, "objnames": {"0": ["cpp", "class", "C++ class"], "1": ["cpp", "templateParam", "C++ template parameter"]}, "titleterms": {"api": 0, "refer": 0, "guid": [0, 1, 3, 4, 6], "introduct": [0, 3], "us": 0, "ck": [0, 5, 7], "datatyp": 0, "devicemem": 0, "kernel": [0, 5, 6], "For": 0, "flashattent": 0, "contributor": 1, "": [1, 2], "pull": 1, "request": 1, "guidelin": 1, "disclaim": 2, "amd": 2, "standard": 2, "legal": 2, "third": 2, "parti": 2, "get": 3, "start": [3, 5], "document": 3, "roadmap": 3, "support": 4, "primit": 4, "softmax": 4, "docker": 5, "hub": 5, "why": 5, "do": 5, "i": 5, "need": 5, "thi": 5, "so": 5, "what": 5, "compos": [5, 6], "And": 5, "insid": 5, "which": 5, "imag": 5, "right": 5, "me": 5, "dii": 5, "here": 5, "licens": 5, "user": 6, "content": 6, "hello": 7, "world": 7, "motiv": 7, "descript": 7, "hardwar": 7, "target": 7, "build": 7, "librari": 7, "run": 7, "exampl": 7, "test": 7, "summari": 7}, "envversion": {"sphinx.domains.c": 2, "sphinx.domains.changeset": 1, "sphinx.domains.citation": 1, "sphinx.domains.cpp": 8, "sphinx.domains.index": 1, "sphinx.domains.javascript": 2, "sphinx.domains.math": 2, "sphinx.domains.python": 3, "sphinx.domains.rst": 2, "sphinx.domains.std": 2, "sphinxcontrib.bibtex": 9, "sphinx": 57}, "alltitles": {"API Reference Guide": [[0, "api-reference-guide"]], "Introduction": [[0, "introduction"], [3, "introduction"]], "Using CK API": [[0, "using-ck-api"]], "CK Datatypes": [[0, "ck-datatypes"]], "DeviceMem": [[0, "devicemem"]], "Kernels For Flashattention": [[0, "kernels-for-flashattention"]], "Contributor\u2019s Guide": [[1, "contributor-s-guide"]], "Pull-request guidelines": [[1, "pull-request-guidelines"]], "Disclaimer": [[2, "disclaimer"]], "AMD\u2019s standard legal Disclaimer": [[2, "amd-s-standard-legal-disclaimer"]], "Third Party Disclaimer": [[2, "third-party-disclaimer"]], "Getting Started Guide": [[3, "getting-started-guide"]], "Documentation Roadmap": [[3, "documentation-roadmap"]], "Supported Primitives Guide": [[4, "supported-primitives-guide"]], "Softmax": [[4, "softmax"]], "CK docker hub": [[5, "ck-docker-hub"]], "Why do I need this?": [[5, "why-do-i-need-this"]], "So what is Composable Kernel?": [[5, "so-what-is-composable-kernel"]], "And what is inside?": [[5, "and-what-is-inside"]], "Which image is right for me?": [[5, "which-image-is-right-for-me"]], "DIY starts here": [[5, "diy-starts-here"]], "License": [[5, "license"]], "Composable Kernel User Guide": [[6, "composable-kernel-user-guide"]], "Contents:": [[6, null]], "CK Hello world": [[7, "ck-hello-world"]], "Motivation": [[7, "motivation"]], "Description": [[7, "description"]], "Hardware targets": [[7, "hardware-targets"]], "Build the library": [[7, "build-the-library"]], "Run examples and tests": [[7, "run-examples-and-tests"]], "Summary": [[7, "summary"]]}, "indexentries": {"devicemem (c++ struct)": [[0, "_CPPv49DeviceMem"]], "ck::blockwisegemmxdlops_v2 (c++ struct)": [[0, "_CPPv4I_7index_t000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_7index_t_7index_tEN2ck22BlockwiseGemmXdlops_v2E"]], "ck::blockwisesoftmax (c++ struct)": [[0, "_CPPv4I_7index_t0000_bEN2ck16BlockwiseSoftmaxE"]], "ck::gridwisebatchedgemmsoftmaxgemm_xdl_cshuffle (c++ struct)": [[0, "_CPPv4I000000000_25InMemoryDataOperationEnum0000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t000_7index_t_7index_t_7index_t_b_7index_t_7index_t_7index_t0_7index_t_13LoopScheduler_b_b_15PipelineVersionEN2ck43GridwiseBatchedGemmSoftmaxGemm_Xdl_CShuffleE"]], "ck::threadgrouptensorslicetransfer_v4r1 (c++ struct)": [[0, "_CPPv4I000_25InMemoryDataOperationEnum000000000_7index_t_7index_t_7index_t_7index_t_7index_t_7index_t_b_b_7index_tEN2ck35ThreadGroupTensorSliceTransfer_v4r1E"]], "ck::threadwisetensorslicetransfer_statictostatic (c++ struct)": [[0, "_CPPv4I0000000_7index_t_7index_t_N9enable_ifIXaaclN7SrcDesc20IsKnownAtCompileTimeEEclN7DstDesc20IsKnownAtCompileTimeEEEbE4typeEEN2ck44ThreadwiseTensorSliceTransfer_StaticToStaticE"]]}})
\ No newline at end of file
<!DOCTYPE html>
<html class="writer-html5" lang="en" >
<head>
<meta charset="utf-8" /><meta name="generator" content="Docutils 0.18.1: http://docutils.sourceforge.net/" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>2. CK Hello world &mdash; Composable Kernel (CK) documentation</title>
<link rel="stylesheet" href="_static/pygments.css" type="text/css" />
<link rel="stylesheet" href="_static/css/theme.css" type="text/css" />
<!--[if lt IE 9]>
<script src="_static/js/html5shiv.min.js"></script>
<![endif]-->
<script data-url_root="./" id="documentation_options" src="_static/documentation_options.js"></script>
<script src="_static/doctools.js"></script>
<script src="_static/sphinx_highlight.js"></script>
<script src="_static/js/theme.js"></script>
<link rel="index" title="Index" href="genindex.html" />
<link rel="search" title="Search" href="search.html" />
<link rel="next" title="3. CK docker hub" href="dockerhub.html" />
<link rel="prev" title="1. Getting Started Guide" href="Linux_Install_Guide.html" />
</head>
<body class="wy-body-for-nav">
<div class="wy-grid-for-nav">
<nav data-toggle="wy-nav-shift" class="wy-nav-side">
<div class="wy-side-scroll">
<div class="wy-side-nav-search" >
<a href="index.html">
<img src="_static/rocm_logo.png" class="logo" alt="Logo"/>
</a>
<div role="search">
<form id="rtd-search-form" class="wy-form" action="search.html" method="get">
<input type="text" name="q" placeholder="Search docs" aria-label="Search docs" />
<input type="hidden" name="check_keywords" value="yes" />
<input type="hidden" name="area" value="default" />
</form>
</div>
</div><div class="wy-menu wy-menu-vertical" data-spy="affix" role="navigation" aria-label="Navigation menu">
<p class="caption" role="heading"><span class="caption-text">Contents:</span></p>
<ul class="current">
<li class="toctree-l1"><a class="reference internal" href="Linux_Install_Guide.html">1. Getting Started Guide</a></li>
<li class="toctree-l1 current"><a class="current reference internal" href="#">2. CK Hello world</a><ul>
<li class="toctree-l2"><a class="reference internal" href="#motivation">2.1. Motivation</a></li>
<li class="toctree-l2"><a class="reference internal" href="#description">2.2. Description</a></li>
<li class="toctree-l2"><a class="reference internal" href="#hardware-targets">2.3. Hardware targets</a></li>
<li class="toctree-l2"><a class="reference internal" href="#build-the-library">2.4. Build the library</a></li>
<li class="toctree-l2"><a class="reference internal" href="#run-examples-and-tests">2.5. Run examples and tests</a></li>
<li class="toctree-l2"><a class="reference internal" href="#summary">2.6. Summary</a></li>
</ul>
</li>
<li class="toctree-l1"><a class="reference internal" href="dockerhub.html">3. CK docker hub</a></li>
<li class="toctree-l1"><a class="reference internal" href="Supported_Primitives_Guide.html">4. Supported Primitives Guide</a></li>
<li class="toctree-l1"><a class="reference internal" href="API_Reference_Guide.html">5. API Reference Guide</a></li>
<li class="toctree-l1"><a class="reference internal" href="Contributors_Guide.html">6. Contributor’s Guide</a></li>
<li class="toctree-l1"><a class="reference internal" href="Disclaimer.html">7. Disclaimer</a></li>
</ul>
</div>
</div>
</nav>
<section data-toggle="wy-nav-shift" class="wy-nav-content-wrap"><nav class="wy-nav-top" aria-label="Mobile navigation menu" >
<i data-toggle="wy-nav-top" class="fa fa-bars"></i>
<a href="index.html">Composable Kernel (CK)</a>
</nav>
<div class="wy-nav-content">
<div class="rst-content">
<div role="navigation" aria-label="Page navigation">
<ul class="wy-breadcrumbs">
<li><a href="index.html" class="icon icon-home" aria-label="Home"></a></li>
<li class="breadcrumb-item active"><span class="section-number">2. </span>CK Hello world</li>
<li class="wy-breadcrumbs-aside">
<a href="_sources/tutorial_hello_world.rst.txt" rel="nofollow"> View page source</a>
</li>
</ul>
<hr/>
</div>
<div role="main" class="document" itemscope="itemscope" itemtype="http://schema.org/Article">
<div itemprop="articleBody">
<section id="ck-hello-world">
<h1><span class="section-number">2. </span>CK Hello world<a class="headerlink" href="#ck-hello-world" title="Permalink to this heading"></a></h1>
<section id="motivation">
<h2><span class="section-number">2.1. </span>Motivation<a class="headerlink" href="#motivation" title="Permalink to this heading"></a></h2>
<p>This tutorial is aimed at engineers dealing with artificial intelligence and machine learning who would like to optimize their pipelines and squeeze every performance drop by adding Composable Kernel (CK) library to their projects. We would like to make the CK library approachable so the tutorial is not based on the latest release and doesn’t have all the bleeding edge features, but it will be reproducible now and forever.</p>
<p>During this tutorial we will have an introduction to the CK library, we will build it and run some examples and tests, so to say we will run a “Hello world” example. In future tutorials we will go in depth and breadth and get familiar with other tools and ways to integrate CK into your project.</p>
</section>
<section id="description">
<h2><span class="section-number">2.2. </span>Description<a class="headerlink" href="#description" title="Permalink to this heading"></a></h2>
<p>Modern AI technology solves more and more problems in all imaginable fields, but crafting fast and efficient workflows is still challenging. CK is one of the tools to make AI heavy lifting as fast and efficient as possible. CK is a collection of optimized AI operator kernels and tools to create new ones. The library has components required for majority of modern neural networks architectures including matrix multiplication, convolution, contraction, reduction, attention modules, variety of activation functions, fused operators and many more.</p>
<p>So how do we (almost) reach the speed of light? CK acceleration abilities are based on:</p>
<ul class="simple">
<li><p>Layered structure.</p></li>
<li><p>Tile-based computation model.</p></li>
<li><p>Tensor coordinate transformation.</p></li>
<li><p>Hardware acceleration use.</p></li>
<li><p>Support of low precision data types including fp16, bf16, int8 and int4.</p></li>
</ul>
<p>If you are excited and need more technical details and benchmarking results - read this awesome <a class="reference external" href="https://community.amd.com/t5/instinct-accelerators/amd-composable-kernel-library-efficient-fused-kernels-for-ai/ba-p/553224">blog post</a>.</p>
<p>For more details visit our <a class="reference external" href="https://github.com/ROCmSoftwarePlatform/composable_kernel">github repo</a>.</p>
</section>
<section id="hardware-targets">
<h2><span class="section-number">2.3. </span>Hardware targets<a class="headerlink" href="#hardware-targets" title="Permalink to this heading"></a></h2>
<p>CK library fully supports “gfx908” and “gfx90a” GPU architectures and only some operators are supported for “gfx1030”. Let’s check the hardware you have at hand and decide on the target GPU architecture</p>
<table class="docutils align-default">
<thead>
<tr class="row-odd"><th class="head"><p>GPU Target</p></th>
<th class="head"><p>AMD GPU</p></th>
</tr>
</thead>
<tbody>
<tr class="row-even"><td><p>gfx908</p></td>
<td><p>Radeon Instinct MI100</p></td>
</tr>
<tr class="row-odd"><td><p>gfx90a</p></td>
<td><p>Radeon Instinct MI210, MI250, MI250X</p></td>
</tr>
<tr class="row-even"><td><p>gfx1030</p></td>
<td><p>Radeon PRO V620, W6800, W6800X, W6800X Duo, W6900X, RX 6800, RX 6800 XT, RX 6900 XT, RX 6900 XTX, RX 6950 XT</p></td>
</tr>
</tbody>
</table>
<p>There are also <a class="reference external" href="https://aws.amazon.com/ec2/instance-types/g4/">cloud options</a> you can find if you don’t have an AMD GPU at hand.</p>
</section>
<section id="build-the-library">
<h2><span class="section-number">2.4. </span>Build the library<a class="headerlink" href="#build-the-library" title="Permalink to this heading"></a></h2>
<p>First let’s clone the library and rebase to the tested version:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">git</span> <span class="n">clone</span> <span class="n">https</span><span class="p">:</span><span class="o">//</span><span class="n">github</span><span class="o">.</span><span class="n">com</span><span class="o">/</span><span class="n">ROCmSoftwarePlatform</span><span class="o">/</span><span class="n">composable_kernel</span><span class="o">.</span><span class="n">git</span>
<span class="n">cd</span> <span class="n">composable_kernel</span><span class="o">/</span>
<span class="n">git</span> <span class="n">checkout</span> <span class="n">tutorial_hello_world</span>
</pre></div>
</div>
<p>To make our lives easier we prepared <a class="reference external" href="https://hub.docker.com/r/rocm/composable_kernel">docker images</a> with all the necessary dependencies. Pick the right image and create a container. In this tutorial we use “rocm/composable_kernel:ck_ub20.04_rocm5.3_release” image, it is based on Ubuntu 20.04, ROCm v5.3, compiler release version.</p>
<p>If your current folder is ${HOME}, start the docker container with:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span>docker run \
-it \
--privileged \
--group-add sudo \
-w /root/workspace \
-v ${HOME}:/root/workspace \
rocm/composable_kernel:ck_ub20.04_rocm5.3_release \
/bin/bash
</pre></div>
</div>
<p>If your current folder is different from ${HOME}, adjust the line <cite>-v ${HOME}:/root/workspace</cite> to fit your folder structure.</p>
<p>Inside the docker container current folder is “~/workspace”, library path is “~/workspace/composable_kernel”, navigate to the library:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">cd</span> <span class="n">composable_kernel</span><span class="o">/</span>
</pre></div>
</div>
<p>Create and go to the “build” directory:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">mkdir</span> <span class="n">build</span> <span class="o">&amp;&amp;</span> <span class="n">cd</span> <span class="n">build</span>
</pre></div>
</div>
<p>In the previous section we talked about target GPU architecture. Once you decide which one is right for you, run cmake using the right GPU_TARGETS flag:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">cmake</span> \
<span class="o">-</span><span class="n">D</span> <span class="n">CMAKE_PREFIX_PATH</span><span class="o">=/</span><span class="n">opt</span><span class="o">/</span><span class="n">rocm</span> \
<span class="o">-</span><span class="n">D</span> <span class="n">CMAKE_CXX_COMPILER</span><span class="o">=/</span><span class="n">opt</span><span class="o">/</span><span class="n">rocm</span><span class="o">/</span><span class="nb">bin</span><span class="o">/</span><span class="n">hipcc</span> \
<span class="o">-</span><span class="n">D</span> <span class="n">CMAKE_CXX_FLAGS</span><span class="o">=</span><span class="s2">&quot;-O3&quot;</span> \
<span class="o">-</span><span class="n">D</span> <span class="n">CMAKE_BUILD_TYPE</span><span class="o">=</span><span class="n">Release</span> \
<span class="o">-</span><span class="n">D</span> <span class="n">BUILD_DEV</span><span class="o">=</span><span class="n">OFF</span> \
<span class="o">-</span><span class="n">D</span> <span class="n">GPU_TARGETS</span><span class="o">=</span><span class="s2">&quot;gfx908;gfx90a;gfx1030&quot;</span> <span class="o">..</span>
</pre></div>
</div>
<p>If everything went well the cmake run will end up with:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="o">--</span> <span class="n">Configuring</span> <span class="n">done</span>
<span class="o">--</span> <span class="n">Generating</span> <span class="n">done</span>
<span class="o">--</span> <span class="n">Build</span> <span class="n">files</span> <span class="n">have</span> <span class="n">been</span> <span class="n">written</span> <span class="n">to</span><span class="p">:</span> <span class="s2">&quot;/root/workspace/composable_kernel/build&quot;</span>
</pre></div>
</div>
<p>Finally, we can build examples and tests:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">make</span> <span class="o">-</span><span class="n">j</span> <span class="n">examples</span> <span class="n">tests</span>
</pre></div>
</div>
<p>If everything is smooth, you’ll see:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">Scanning</span> <span class="n">dependencies</span> <span class="n">of</span> <span class="n">target</span> <span class="n">tests</span>
<span class="p">[</span><span class="mi">100</span><span class="o">%</span><span class="p">]</span> <span class="n">Built</span> <span class="n">target</span> <span class="n">tests</span>
</pre></div>
</div>
</section>
<section id="run-examples-and-tests">
<h2><span class="section-number">2.5. </span>Run examples and tests<a class="headerlink" href="#run-examples-and-tests" title="Permalink to this heading"></a></h2>
<p>Examples are listed as test cases as well, so we can run all examples and tests with:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">ctest</span>
</pre></div>
</div>
<p>You can check the list of all tests by running:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">ctest</span> <span class="o">-</span><span class="n">N</span>
</pre></div>
</div>
<p>We can also run them separately, here is a separate example execution:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="o">./</span><span class="nb">bin</span><span class="o">/</span><span class="n">example_gemm_xdl_fp16</span> <span class="mi">1</span> <span class="mi">1</span> <span class="mi">1</span>
</pre></div>
</div>
<p>The arguments “1 1 1” mean that we want to run this example in the mode: verify results with CPU, initialize matrices with integers and benchmark the kernel execution. You can play around with these parameters and see how output and execution results change.</p>
<p>If everything goes well and you have a device based on gfx908 or gfx90a architecture you should see something like:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">a_m_k</span><span class="p">:</span> <span class="n">dim</span> <span class="mi">2</span><span class="p">,</span> <span class="n">lengths</span> <span class="p">{</span><span class="mi">3840</span><span class="p">,</span> <span class="mi">4096</span><span class="p">},</span> <span class="n">strides</span> <span class="p">{</span><span class="mi">4096</span><span class="p">,</span> <span class="mi">1</span><span class="p">}</span>
<span class="n">b_k_n</span><span class="p">:</span> <span class="n">dim</span> <span class="mi">2</span><span class="p">,</span> <span class="n">lengths</span> <span class="p">{</span><span class="mi">4096</span><span class="p">,</span> <span class="mi">4096</span><span class="p">},</span> <span class="n">strides</span> <span class="p">{</span><span class="mi">1</span><span class="p">,</span> <span class="mi">4096</span><span class="p">}</span>
<span class="n">c_m_n</span><span class="p">:</span> <span class="n">dim</span> <span class="mi">2</span><span class="p">,</span> <span class="n">lengths</span> <span class="p">{</span><span class="mi">3840</span><span class="p">,</span> <span class="mi">4096</span><span class="p">},</span> <span class="n">strides</span> <span class="p">{</span><span class="mi">4096</span><span class="p">,</span> <span class="mi">1</span><span class="p">}</span>
<span class="n">launch_and_time_kernel</span><span class="p">:</span> <span class="n">grid_dim</span> <span class="p">{</span><span class="mi">480</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">},</span> <span class="n">block_dim</span> <span class="p">{</span><span class="mi">256</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">}</span>
<span class="n">Warm</span> <span class="n">up</span> <span class="mi">1</span> <span class="n">time</span>
<span class="n">Start</span> <span class="n">running</span> <span class="mi">10</span> <span class="n">times</span><span class="o">...</span>
<span class="n">Perf</span><span class="p">:</span> <span class="mf">1.10017</span> <span class="n">ms</span><span class="p">,</span> <span class="mf">117.117</span> <span class="n">TFlops</span><span class="p">,</span> <span class="mf">87.6854</span> <span class="n">GB</span><span class="o">/</span><span class="n">s</span><span class="p">,</span> <span class="n">DeviceGemmXdl</span><span class="o">&lt;</span><span class="mi">256</span><span class="p">,</span> <span class="mi">256</span><span class="p">,</span> <span class="mi">128</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">8</span><span class="p">,</span> <span class="mi">32</span><span class="p">,</span> <span class="mi">32</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">2</span><span class="o">&gt;</span> <span class="n">NumPrefetch</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span> <span class="n">LoopScheduler</span><span class="p">:</span> <span class="n">Default</span><span class="p">,</span> <span class="n">PipelineVersion</span><span class="p">:</span> <span class="n">v1</span>
</pre></div>
</div>
<p>Meanwhile, running it on a gfx1030 device should result in:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">a_m_k</span><span class="p">:</span> <span class="n">dim</span> <span class="mi">2</span><span class="p">,</span> <span class="n">lengths</span> <span class="p">{</span><span class="mi">3840</span><span class="p">,</span> <span class="mi">4096</span><span class="p">},</span> <span class="n">strides</span> <span class="p">{</span><span class="mi">4096</span><span class="p">,</span> <span class="mi">1</span><span class="p">}</span>
<span class="n">b_k_n</span><span class="p">:</span> <span class="n">dim</span> <span class="mi">2</span><span class="p">,</span> <span class="n">lengths</span> <span class="p">{</span><span class="mi">4096</span><span class="p">,</span> <span class="mi">4096</span><span class="p">},</span> <span class="n">strides</span> <span class="p">{</span><span class="mi">1</span><span class="p">,</span> <span class="mi">4096</span><span class="p">}</span>
<span class="n">c_m_n</span><span class="p">:</span> <span class="n">dim</span> <span class="mi">2</span><span class="p">,</span> <span class="n">lengths</span> <span class="p">{</span><span class="mi">3840</span><span class="p">,</span> <span class="mi">4096</span><span class="p">},</span> <span class="n">strides</span> <span class="p">{</span><span class="mi">4096</span><span class="p">,</span> <span class="mi">1</span><span class="p">}</span>
<span class="n">DeviceGemmXdl</span><span class="o">&lt;</span><span class="mi">256</span><span class="p">,</span> <span class="mi">256</span><span class="p">,</span> <span class="mi">128</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">8</span><span class="p">,</span> <span class="mi">32</span><span class="p">,</span> <span class="mi">32</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">2</span><span class="o">&gt;</span> <span class="n">NumPrefetch</span><span class="p">:</span> <span class="mi">1</span><span class="p">,</span> <span class="n">LoopScheduler</span><span class="p">:</span> <span class="n">Default</span><span class="p">,</span> <span class="n">PipelineVersion</span><span class="p">:</span> <span class="n">v1</span> <span class="n">does</span> <span class="ow">not</span> <span class="n">support</span> <span class="n">this</span> <span class="n">problem</span>
</pre></div>
</div>
<p>But don’t panic, some of the operators are supported on gfx1030 architecture, so you can run a separate example like:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="o">./</span><span class="nb">bin</span><span class="o">/</span><span class="n">example_gemm_dl_fp16</span> <span class="mi">1</span> <span class="mi">1</span> <span class="mi">1</span>
</pre></div>
</div>
<p>and it should result in something nice similar to:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">a_m_k</span><span class="p">:</span> <span class="n">dim</span> <span class="mi">2</span><span class="p">,</span> <span class="n">lengths</span> <span class="p">{</span><span class="mi">3840</span><span class="p">,</span> <span class="mi">4096</span><span class="p">},</span> <span class="n">strides</span> <span class="p">{</span><span class="mi">1</span><span class="p">,</span> <span class="mi">4096</span><span class="p">}</span>
<span class="n">b_k_n</span><span class="p">:</span> <span class="n">dim</span> <span class="mi">2</span><span class="p">,</span> <span class="n">lengths</span> <span class="p">{</span><span class="mi">4096</span><span class="p">,</span> <span class="mi">4096</span><span class="p">},</span> <span class="n">strides</span> <span class="p">{</span><span class="mi">4096</span><span class="p">,</span> <span class="mi">1</span><span class="p">}</span>
<span class="n">c_m_n</span><span class="p">:</span> <span class="n">dim</span> <span class="mi">2</span><span class="p">,</span> <span class="n">lengths</span> <span class="p">{</span><span class="mi">3840</span><span class="p">,</span> <span class="mi">4096</span><span class="p">},</span> <span class="n">strides</span> <span class="p">{</span><span class="mi">4096</span><span class="p">,</span> <span class="mi">1</span><span class="p">}</span>
<span class="n">arg</span><span class="o">.</span><span class="n">a_grid_desc_k0_m0_m1_k1_</span><span class="p">{</span><span class="mi">2048</span><span class="p">,</span> <span class="mi">3840</span><span class="p">,</span> <span class="mi">2</span><span class="p">}</span>
<span class="n">arg</span><span class="o">.</span><span class="n">b_grid_desc_k0_n0_n1_k1_</span><span class="p">{</span><span class="mi">2048</span><span class="p">,</span> <span class="mi">4096</span><span class="p">,</span> <span class="mi">2</span><span class="p">}</span>
<span class="n">arg</span><span class="o">.</span><span class="n">c_grid_desc_m_n_</span><span class="p">{</span> <span class="mi">3840</span><span class="p">,</span> <span class="mi">4096</span><span class="p">}</span>
<span class="n">launch_and_time_kernel</span><span class="p">:</span> <span class="n">grid_dim</span> <span class="p">{</span><span class="mi">960</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">},</span> <span class="n">block_dim</span> <span class="p">{</span><span class="mi">256</span><span class="p">,</span> <span class="mi">1</span><span class="p">,</span> <span class="mi">1</span><span class="p">}</span>
<span class="n">Warm</span> <span class="n">up</span> <span class="mi">1</span> <span class="n">time</span>
<span class="n">Start</span> <span class="n">running</span> <span class="mi">10</span> <span class="n">times</span><span class="o">...</span>
<span class="n">Perf</span><span class="p">:</span> <span class="mf">3.65695</span> <span class="n">ms</span><span class="p">,</span> <span class="mf">35.234</span> <span class="n">TFlops</span><span class="p">,</span> <span class="mf">26.3797</span> <span class="n">GB</span><span class="o">/</span><span class="n">s</span><span class="p">,</span> <span class="n">DeviceGemmDl</span><span class="o">&lt;</span><span class="mi">256</span><span class="p">,</span> <span class="mi">128</span><span class="p">,</span> <span class="mi">128</span><span class="p">,</span> <span class="mi">16</span><span class="p">,</span> <span class="mi">2</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">4</span><span class="p">,</span> <span class="mi">1</span><span class="o">&gt;</span>
</pre></div>
</div>
<p>Or we can run a separate test:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">ctest</span> <span class="o">-</span><span class="n">R</span> <span class="n">test_gemm_fp16</span>
</pre></div>
</div>
<p>If everything goes well you should see something like:</p>
<div class="highlight-default notranslate"><div class="highlight"><pre><span></span><span class="n">Start</span> <span class="mi">121</span><span class="p">:</span> <span class="n">test_gemm_fp16</span>
<span class="mi">1</span><span class="o">/</span><span class="mi">1</span> <span class="n">Test</span> <span class="c1">#121: test_gemm_fp16 ................... Passed 51.81 sec</span>
<span class="mi">100</span><span class="o">%</span> <span class="n">tests</span> <span class="n">passed</span><span class="p">,</span> <span class="mi">0</span> <span class="n">tests</span> <span class="n">failed</span> <span class="n">out</span> <span class="n">of</span> <span class="mi">1</span>
</pre></div>
</div>
</section>
<section id="summary">
<h2><span class="section-number">2.6. </span>Summary<a class="headerlink" href="#summary" title="Permalink to this heading"></a></h2>
<p>In this tutorial we took the first look at the Composable Kernel library, built it on your system and ran some examples and tests. Stay tuned, in the next tutorial we will run kernels with different configs to find out the best one for your hardware and task.</p>
<p>P.S.: Don’t forget to switch out the cloud instance if you have launched one, you can find better ways to spend your money for sure!</p>
</section>
</section>
</div>
</div>
<footer><div class="rst-footer-buttons" role="navigation" aria-label="Footer">
<a href="Linux_Install_Guide.html" class="btn btn-neutral float-left" title="1. Getting Started Guide" accesskey="p" rel="prev"><span class="fa fa-arrow-circle-left" aria-hidden="true"></span> Previous</a>
<a href="dockerhub.html" class="btn btn-neutral float-right" title="3. CK docker hub" accesskey="n" rel="next">Next <span class="fa fa-arrow-circle-right" aria-hidden="true"></span></a>
</div>
<hr/>
<div role="contentinfo">
<p>&#169; Copyright 2018-2023, Advanced Micro Devices.</p>
</div>
Built with <a href="https://www.sphinx-doc.org/">Sphinx</a> using a
<a href="https://github.com/readthedocs/sphinx_rtd_theme">theme</a>
provided by <a href="https://readthedocs.org">Read the Docs</a>.
</footer>
</div>
</div>
</section>
</div>
<script>
jQuery(function () {
SphinxRtdTheme.Navigation.enable(true);
});
</script>
</body>
</html>
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment