for path in"$KNOWLEDGE_SRC/SKILL.md""$NCUREPORT_SRC/SKILL.md""$AGENTLOOP_SRC/SKILL.md";do
for path in"$KNOWLEDGE_SRC/SKILL.md""$NCUREPORT_SRC/SKILL.md""$AGENTLOOP_SRC/SKILL.md""$TRITON_AGENT_SRC/SKILL.md""$TRITON_KNOWLEDGE_SRC/SKILL.md""$TRITON_PROFILER_SRC/SKILL.md";do
"description":"Complementary kernel knowledge map for Humanize-driven GPU kernel optimization. Lists code and knowledge repositories that have no curated PR diffs in the local Route A corpus (NVIDIA developer samples, Colfax research kernels, simveit micro-tutorials, Hygon/DCU optimization references); each framework entry points to upstream repos, kernel directories, and source guides. Topic entries map kernel topics to per-framework references for live clone/grep workflows. Frameworks already covered by Route A PR bundles (SGLang, vLLM, TensorRT-LLM, PyTorch, FlashAttention, FlashInfer, CUTLASS/CuTe, CCCL, Triton, DeepGEMM, ThunderKittens, TileLang, QuACK, DeepSeek TileKernels) are intentionally excluded.",
"description":"Complementary kernel knowledge map for Humanize-driven GPU kernel optimization. Lists code and knowledge repositories that have no curated PR diffs in the local Route A corpus (NVIDIA developer samples, Colfax research kernels, simveit micro-tutorials, Hygon/DCU optimization references, and open Triton kernel libraries such as AITER, AOTriton, Conch, FlagGems, Liger Kernel, Hugging Face kernels, and Triton-distributed); each framework entry points to upstream repos, kernel directories, and source guides. Topic entries map kernel topics to per-framework references for live clone/grep workflows. Frameworks already covered by Route A PR bundles (SGLang, vLLM, TensorRT-LLM, PyTorch, FlashAttention, FlashInfer, CUTLASS/CuTe, CCCL, Triton, DeepGEMM, ThunderKittens, TileLang, QuACK, DeepSeek TileKernels) are intentionally excluded.",
"frameworks":[
"frameworks":[
{
{
"id":"nvidia-code-samples",
"id":"nvidia-code-samples",
...
@@ -150,6 +150,196 @@
...
@@ -150,6 +150,196 @@
"dccobjdump",
"dccobjdump",
"sqtt"
"sqtt"
]
]
},
{
"id":"rocm-aiter",
"name":"AITER AI Tensor Engine for ROCm",
"repo":"ROCm/aiter",
"url":"https://github.com/ROCm/aiter",
"kernel_paths":[
"aiter",
"op_tests",
"docs",
"gradlib",
"csrc",
"requirements-triton-comms.txt",
".github/scripts/install_triton.sh",
"README.md"
],
"tags":[
"triton",
"rocm",
"aiter",
"attention",
"mla",
"paged-attention",
"fused-moe",
"gemm",
"rmsnorm",
"quantization",
"communication"
]
},
{
"id":"rocm-aotriton",
"name":"AOTriton Ahead-of-Time Triton Math Library",
"repo":"ROCm/aotriton",
"url":"https://github.com/ROCm/aotriton",
"kernel_paths":[
"v2python",
"v2src",
"v3python",
"v3src",
"tritonsrc",
"include/aotriton",
"test",
"docs",
"README.md"
],
"tags":[
"triton",
"rocm",
"aot",
"aotriton",
"flash-attention",
"sdpa",
"attention",
"compiler",
"codegen"
]
},
{
"id":"stackav-conch",
"name":"Conch Triton Kernel Standard Library",
"repo":"stackav-oss/conch",
"url":"https://github.com/stackav-oss/conch",
"kernel_paths":[
"conch",
"tests",
"benchmarks",
"README.md",
"pyproject.toml"
],
"tags":[
"triton",
"rocm",
"standard-library",
"paged-attention",
"varlen-attention",
"rmsnorm",
"rotary",
"kv-cache",
"fp8",
"int8",
"quantization",
"vllm"
]
},
{
"id":"flaggems",
"name":"FlagGems Triton Operator Library",
"repo":"flagos-ai/FlagGems",
"url":"https://github.com/flagos-ai/FlagGems",
"kernel_paths":[
"src/flag_gems",
"benchmark",
"tests",
"modules_tests",
"experimental_tests",
"triton_src",
"docs",
"README.md"
],
"tags":[
"triton",
"pytorch",
"operator-library",
"llm",
"backend-neutral",
"multi-backend",
"aten",
"normalization",
"reduction",
"elementwise",
"quantization"
]
},
{
"id":"liger-kernel",
"name":"Liger Kernel Triton Kernels for LLM Training",
"repo":"linkedin/Liger-Kernel",
"url":"https://github.com/linkedin/Liger-Kernel",
"kernel_paths":[
"src/liger_kernel",
"test",
"benchmark",
"examples",
"docs",
"README.md"
],
"tags":[
"triton",
"llm-training",
"rmsnorm",
"rope",
"swiglu",
"cross-entropy",
"fused-linear-cross-entropy",
"loss",
"amd"
]
},
{
"id":"huggingface-kernels",
"name":"Hugging Face Kernels and kernels-community Hub",