schema_version: 1 description: Framework metadata for repositories covered by the local PR diff corpus (Route A). These entries are intentionally NOT in index.json, which scopes Route B to complementary code repositories. Used by scripts/expand-pr-corpus.py to refresh the PR pipeline. frameworks: - id: sglang name: SGLang repo: sgl-project/sglang url: https://github.com/sgl-project/sglang kernel_paths: - python/sglang/srt/layers/attention - python/sglang/srt/layers/moe - python/sglang/srt/layers/quantization - python/sglang/srt/layers/sampler.py - sgl-kernel/csrc - sgl-kernel/csrc/attention - sgl-kernel/csrc/moe - sgl-kernel/csrc/elementwise - sgl-kernel/csrc/gemm - sgl-kernel/csrc/spec_decode - sgl-kernel/python/sgl_kernel tags: - llm-serving - attention - moe - fp8 - speculative-decoding - sampling - cuda - triton - id: vllm name: vLLM repo: vllm-project/vllm url: https://github.com/vllm-project/vllm kernel_paths: - csrc - csrc/attention - csrc/moe - csrc/quantization - csrc/cutlass_extensions - vllm/attention - vllm/model_executor/layers tags: - llm-serving - paged-attention - moe - awq - gptq - fp8 - cuda - id: tensorrt-llm name: TensorRT-LLM repo: NVIDIA/TensorRT-LLM url: https://github.com/NVIDIA/TensorRT-LLM kernel_paths: - cpp/tensorrt_llm/kernels - cpp/tensorrt_llm/kernels/decoderMaskedMultiheadAttention - cpp/tensorrt_llm/kernels/mixtureOfExperts - cpp/tensorrt_llm/kernels/quantization - cpp/tensorrt_llm/kernels/cutlass_kernels - cpp/tensorrt_llm/kernels/internal_cutlass_kernels - cpp/tensorrt_llm/kernels/userbuffers tags: - llm-serving - cutlass - fmha - moe - fp8 - fp4 - tma - wgmma - nvls - userbuffers - id: pytorch name: PyTorch repo: pytorch/pytorch url: https://github.com/pytorch/pytorch kernel_paths: - aten/src/ATen/native/cuda - aten/src/ATen/native/transformers/cuda - aten/src/ATen/native/cudnn - torch/_inductor - torch/csrc/distributed tags: - aten - elementwise - reduction - attention - sdpa - cudnn - inductor - nccl - id: flash-attention name: FlashAttention repo: Dao-AILab/flash-attention url: https://github.com/Dao-AILab/flash-attention kernel_paths: - csrc/flash_attn/src - csrc/flash_attn/src/flash_fwd_kernel.h - flash_attn/cute - hopper - benchmarks/benchmark_attn.py - benchmarks/bench_sm90.py tags: - attention - fmha - online-softmax - tma - wgmma - hopper - sm80 - sm90 - cutlass - cute - id: flashinfer name: FlashInfer repo: flashinfer-ai/flashinfer url: https://github.com/flashinfer-ai/flashinfer kernel_paths: - csrc - csrc/fmha_v2/fmha - csrc/fmha_v2/fmha/hopper - csrc/fmha_v2/fmha/warpspec - include/flashinfer - benchmarks/bench_blackwell_attention.py - python/flashinfer tags: - attention - fmha - kv-cache - page-table - sampling - speculative - hopper - blackwell - id: cutlass name: CUTLASS / CuTe repo: NVIDIA/cutlass url: https://github.com/NVIDIA/cutlass kernel_paths: - include/cutlass - include/cute - examples - tools/profiler - python - media/docs tags: - gemm - matmul - epilogue - cute - tma - wgmma - block-scaled - fp8 - fp4 - id: cccl-cub name: CCCL (CUB / Thrust / libcu++) repo: NVIDIA/cccl url: https://github.com/NVIDIA/cccl kernel_paths: - cub - thrust - libcudacxx tags: - reduction - scan - sort - block - warp - cooperative-groups - id: triton name: Triton repo: triton-lang/triton url: https://github.com/triton-lang/triton kernel_paths: - python/tutorials - python/triton/language - python/triton/runtime - lib/Conversion - test/TritonGPU tags: - triton - block-pointer - tma - atomic - autotune - softmax - matmul - id: deepgemm name: DeepGEMM repo: deepseek-ai/DeepGEMM url: https://github.com/deepseek-ai/DeepGEMM kernel_paths: - csrc/apis - csrc/jit - csrc/jit_kernels/heuristics - csrc/jit_kernels/impls - deep_gemm/include/deep_gemm - deep_gemm/testing - tests tags: - fp8 - block-scaled - grouped-gemm - moe-gemm - jit - hopper - blackwell - id: thunderkittens name: ThunderKittens repo: HazyResearch/ThunderKittens url: https://github.com/HazyResearch/ThunderKittens kernel_paths: - include/kittens.cuh - examples tags: - tile-primitives - attention - matmul - warpgroup - hopper - id: tilelang name: TileLang repo: tile-ai/tilelang url: https://github.com/tile-ai/tilelang kernel_paths: - examples - examples/deepseek_mla - python/tilelang - python/tilelang/language - python/tilelang/engine - tests - benchmark tags: - tile-dsl - schedule - matmul - attention - fused-op - moe - mla - id: cute-dsl name: CuTe DSL repo: NVIDIA/cutlass url: https://github.com/NVIDIA/cutlass kernel_paths: - python - include/cute - examples - examples/48_hopper_* - examples/50_blackwell_* - examples/60_* - test/unit tags: - cute-dsl - cutlass - gemm - attention - tma - wgmma - tcgen05 - sm90 - sm100 - id: quack name: QuACK repo: Dao-AILab/quack url: https://github.com/Dao-AILab/quack kernel_paths: - quack - benchmarks - microbenchmarks - examples - tests - docs tags: - cute-dsl - quack - gemm - softmax - norm - cross-entropy - hopper - blackwell - id: tilekernels name: DeepSeek TileKernels repo: deepseek-ai/TileKernels url: https://github.com/deepseek-ai/TileKernels kernel_paths: - tile_kernels - tests - README.md tags: - tilelang - deepseek - moe - quantization - transpose - swiglu - engram - id: sourcefind-lightop name: SourceFind LightOp repo: OpenDAS/lightop url: https://developer.sourcefind.cn/codes/OpenDAS/lightop kernel_paths: - lightop/csrc - lightop - test - setup.py - setup_torch29.py tags: - lightop - dcu - rocm - hip - fused-op - gfx928 - gfx936 - gfx938 - id: flash-attention-cutlass name: DCU Toolkit Flash Attention CUTLASS repo: dcutoolkit/deeplearing/flash-attention-cutlass url: http://42.228.13.241:10068/dcutoolkit/deeplearing/flash-attention-cutlass kernel_paths: - csrc - cutlass - flash_attn - tests - benchmarks - setup.py tags: - flash-attention - attention - cutlass - dcu - rocm - hip