## Canonical Aliases ## Maps canonical terms to known aliases for search and normalization. ## Format: canonical_term: [alias1, alias2, ...] # Hardware features tcgen05: - UMMA - tensor_core_gen05 - "tensor core generation 5" - tcgen05.mma tmem: - tensor-memory - "tensor memory" - TMEM clc: - "cluster launch control" - "Cluster Launch Control" - CLC tma: - "tensor memory accelerator" - "Tensor Memory Accelerator" - TMA - cp.async.bulk 2sm-cooperative: - "2-SM cooperative" - "two-SM cooperative" - "dual CTA" - "2CTA" - "cta_group::2" pdl: - "programmatic dependent launch" - "Programmatic Dependent Launch" - PDL gdc: - "grid dependency control" - "Grid Dependency Control" - GDC nvfp4: - NVFP4 - "nv_float4" - E2M1 - "FP4 E2M1" wgmma: - "wgmma.mma_async" - WGMMA block-scale: - "block scaling" - "block-scaled" - UE8M0 - microscaling - MX # Architecture names sm100: - Blackwell - blackwell - B200 - B100 - GB200 - GB300 - "SM100" sm90: - Hopper - hopper - H100 - H200 - H800 - "SM90" dcu: - Hygon - hygon - DCU - "Hygon DCU" wave64: - wavefront64 - "wavefront size 64" - wavefront mmac: - MMOP - "matrix core" - "v_mmac" - HCU ck-tile: - "CK Tile" - Composable Kernel aiter: - AITER - "AI Tensor Engine" aotriton: - AOTriton - "AOT Triton" - "ahead of time triton" conch: - Conch - conch-triton-kernels flaggems: - FlagGems - flag_gems liger-kernel: - Liger - "Liger Kernel" huggingface-kernels: - "Hugging Face kernels" - kernels-community - hf-kernels triton-distributed: - Triton-distributed - triton_dist - "triton distributed" # Kernel types moe: - MoE - "mixture of experts" - "Mixture of Experts" - "expert parallelism" mla: - MLA - "multi-head latent attention" - "Multi-head Latent Attention" gated-delta-net: - GDN - GatedDeltaNet - "Gated Delta Net" - "gated delta rule" sparse-attention: - NSA - "native sparse attention" - "Native Sparse Attention" # Technique aliases warp-specialization: - "warp specialization" - warp-spec - "warp specialized" persistent-kernel: - "persistent kernels" - "persistent scheduling" fine-grained-quantization: - "tile-wise scaling" - "block-wise scaling" - "per-tile quantization" cuda-core-promotion: - "Nc=128 promotion" - "FP22 accumulator fix" - "accumulation promotion"