schema_version: 1
description: Framework metadata for repositories covered by the local PR diff corpus (Route A). These entries are intentionally NOT in index.json, which scopes Route B to complementary code repositories.
  Used by scripts/expand-pr-corpus.py to refresh the PR pipeline.
frameworks:
- id: sglang
  name: SGLang
  repo: sgl-project/sglang
  url: https://github.com/sgl-project/sglang
  kernel_paths:
  - python/sglang/srt/layers/attention
  - python/sglang/srt/layers/moe
  - python/sglang/srt/layers/quantization
  - python/sglang/srt/layers/sampler.py
  - sgl-kernel/csrc
  - sgl-kernel/csrc/attention
  - sgl-kernel/csrc/moe
  - sgl-kernel/csrc/elementwise
  - sgl-kernel/csrc/gemm
  - sgl-kernel/csrc/spec_decode
  - sgl-kernel/python/sgl_kernel
  tags:
  - llm-serving
  - attention
  - moe
  - fp8
  - speculative-decoding
  - sampling
  - cuda
  - triton
- id: vllm
  name: vLLM
  repo: vllm-project/vllm
  url: https://github.com/vllm-project/vllm
  kernel_paths:
  - csrc
  - csrc/attention
  - csrc/moe
  - csrc/quantization
  - csrc/cutlass_extensions
  - vllm/attention
  - vllm/model_executor/layers
  tags:
  - llm-serving
  - paged-attention
  - moe
  - awq
  - gptq
  - fp8
  - cuda
- id: tensorrt-llm
  name: TensorRT-LLM
  repo: NVIDIA/TensorRT-LLM
  url: https://github.com/NVIDIA/TensorRT-LLM
  kernel_paths:
  - cpp/tensorrt_llm/kernels
  - cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention
  - cpp/tensorrt_llm/kernels/mixtureOfExperts
  - cpp/tensorrt_llm/kernels/quantization
  - cpp/tensorrt_llm/kernels/cutlass_kernels
  - cpp/tensorrt_llm/kernels/internal_cutlass_kernels
  - cpp/tensorrt_llm/kernels/userbuffers
  tags:
  - llm-serving
  - cutlass
  - fmha
  - moe
  - fp8
  - fp4
  - tma
  - wgmma
  - nvls
  - userbuffers
- id: pytorch
  name: PyTorch
  repo: pytorch/pytorch
  url: https://github.com/pytorch/pytorch
  kernel_paths:
  - aten/src/ATen/native/cuda
  - aten/src/ATen/native/transformers/cuda
  - aten/src/ATen/native/cudnn
  - torch/_inductor
  - torch/csrc/distributed
  tags:
  - aten
  - elementwise
  - reduction
  - attention
  - sdpa
  - cudnn
  - inductor
  - nccl
- id: flash-attention
  name: FlashAttention
  repo: Dao-AILab/flash-attention
  url: https://github.com/Dao-AILab/flash-attention
  kernel_paths:
  - csrc/flash_attn/src
  - csrc/flash_attn/src/flash_fwd_kernel.h
  - flash_attn/cute
  - hopper
  - benchmarks/benchmark_attn.py
  - benchmarks/bench_sm90.py
  tags:
  - attention
  - fmha
  - online-softmax
  - tma
  - wgmma
  - hopper
  - sm80
  - sm90
  - cutlass
  - cute
- id: flashinfer
  name: FlashInfer
  repo: flashinfer-ai/flashinfer
  url: https://github.com/flashinfer-ai/flashinfer
  kernel_paths:
  - csrc
  - csrc/fmha_v2/fmha
  - csrc/fmha_v2/fmha/hopper
  - csrc/fmha_v2/fmha/warpspec
  - include/flashinfer
  - benchmarks/bench_blackwell_attention.py
  - python/flashinfer
  tags:
  - attention
  - fmha
  - kv-cache
  - page-table
  - sampling
  - speculative
  - hopper
  - blackwell
- id: cutlass
  name: CUTLASS / CuTe
  repo: NVIDIA/cutlass
  url: https://github.com/NVIDIA/cutlass
  kernel_paths:
  - include/cutlass
  - include/cute
  - examples
  - tools/profiler
  - python
  - media/docs
  tags:
  - gemm
  - matmul
  - epilogue
  - cute
  - tma
  - wgmma
  - block-scaled
  - fp8
  - fp4
- id: cccl-cub
  name: CCCL (CUB / Thrust / libcu++)
  repo: NVIDIA/cccl
  url: https://github.com/NVIDIA/cccl
  kernel_paths:
  - cub
  - thrust
  - libcudacxx
  tags:
  - reduction
  - scan
  - sort
  - block
  - warp
  - cooperative-groups
- id: triton
  name: Triton
  repo: triton-lang/triton
  url: https://github.com/triton-lang/triton
  kernel_paths:
  - python/tutorials
  - python/triton/language
  - python/triton/runtime
  - lib/Conversion
  - test/TritonGPU
  tags:
  - triton
  - block-pointer
  - tma
  - atomic
  - autotune
  - softmax
  - matmul
- id: deepgemm
  name: DeepGEMM
  repo: deepseek-ai/DeepGEMM
  url: https://github.com/deepseek-ai/DeepGEMM
  kernel_paths:
  - csrc/apis
  - csrc/jit
  - csrc/jit_kernels/heuristics
  - csrc/jit_kernels/impls
  - deep_gemm/include/deep_gemm
  - deep_gemm/testing
  - tests
  tags:
  - fp8
  - block-scaled
  - grouped-gemm
  - moe-gemm
  - jit
  - hopper
  - blackwell
- id: thunderkittens
  name: ThunderKittens
  repo: HazyResearch/ThunderKittens
  url: https://github.com/HazyResearch/ThunderKittens
  kernel_paths:
  - include/kittens.cuh
  - examples
  tags:
  - tile-primitives
  - attention
  - matmul
  - warpgroup
  - hopper
- id: tilelang
  name: TileLang
  repo: tile-ai/tilelang
  url: https://github.com/tile-ai/tilelang
  kernel_paths:
  - examples
  - examples/deepseek_mla
  - python/tilelang
  - python/tilelang/language
  - python/tilelang/engine
  - tests
  - benchmark
  tags:
  - tile-dsl
  - schedule
  - matmul
  - attention
  - fused-op
  - moe
  - mla
- id: cute-dsl
  name: CuTe DSL
  repo: NVIDIA/cutlass
  url: https://github.com/NVIDIA/cutlass
  kernel_paths:
  - python
  - include/cute
  - examples
  - examples/48_hopper_*
  - examples/50_blackwell_*
  - examples/60_*
  - test/unit
  tags:
  - cute-dsl
  - cutlass
  - gemm
  - attention
  - tma
  - wgmma
  - tcgen05
  - sm90
  - sm100
- id: quack
  name: QuACK
  repo: Dao-AILab/quack
  url: https://github.com/Dao-AILab/quack
  kernel_paths:
  - quack
  - benchmarks
  - microbenchmarks
  - examples
  - tests
  - docs
  tags:
  - cute-dsl
  - quack
  - gemm
  - softmax
  - norm
  - cross-entropy
  - hopper
  - blackwell
- id: tilekernels
  name: DeepSeek TileKernels
  repo: deepseek-ai/TileKernels
  url: https://github.com/deepseek-ai/TileKernels
  kernel_paths:
  - tile_kernels
  - tests
  - README.md
  tags:
  - tilelang
  - deepseek
  - moe
  - quantization
  - transpose
  - swiglu
  - engram
- id: sourcefind-lightop
  name: SourceFind LightOp
  repo: OpenDAS/lightop
  url: https://developer.sourcefind.cn/codes/OpenDAS/lightop
  kernel_paths:
  - lightop/csrc
  - lightop
  - test
  - setup.py
  - setup_torch29.py
  tags:
  - lightop
  - dcu
  - rocm
  - hip
  - fused-op
  - gfx928
  - gfx936
  - gfx938
- id: flash-attention-cutlass
  name: DCU Toolkit Flash Attention CUTLASS
  repo: dcutoolkit/deeplearing/flash-attention-cutlass
  url: http://42.228.13.241:10068/dcutoolkit/deeplearing/flash-attention-cutlass
  kernel_paths:
  - csrc
  - cutlass
  - flash_attn
  - tests
  - benchmarks
  - setup.py
  tags:
  - flash-attention
  - attention
  - cutlass
  - dcu
  - rocm
  - hip