{ "sglang": { "llama": { "gemm|nvjet": "gemm", "fused_moe_kernel|GroupProblemShape|group_gemm_starts|bmm_|GemmUniversal": "moe_gemm", "moe|sigmoid": "moe", "CatArrayBatched|prepare_inputs": "prepare_next", "ncclDevKernel|cross_device_reduce": "nccl_and_custom_ar", "_norm_|Norm": "norm", "topk": "topk", "act_and_mul_": "activation", "Rotary": "rope", "SoftMax": "softmax", "flash|fmha": "attn", "elementwise": "elementwise", "fp8_quant|cvt_|quantize": "quantize", "reduce_kernel": "reduce", "triton": "triton_kernel", "CUDA mem": "non-gpu-H_D_memops", ".*": "misc" }, "ds": { "block_fp8_matmul": "block_fp8_gemm", "gemm|matmul|nvjet": "gemm", "fused_moe_kernel": "moe_gemm", "moe|expert|sigmoid": "moe", "CatArrayBatched|write_req_to": "prepare_next", "ncclDevKernel|cross_device_reduce|all_gather": "nccl_and_custom_ar", "Norm": "norm", "topk": "topk", "activation|act_and_mul": "activation", "compute_position_kernel": "rope", "elementwise": "elementwise", "fp8_quant|quant_fp8|quantize": "quantize", "SoftMax": "softmax", "reduce": "reduce", "_fwd_|create_flash|::mla::|KVCache": "attn", "CUDA mem": "non-gpu-H_D_memops", ".*": "misc" }, "gpt-oss": { "gemm|nvjet": "gemm", "fused_moe_kernel|_group_gemm|GroupProblemShape|GemmUniversal|bmm_|matmul_ogs_|_topk_forward|_combined_routing|_sum_bitmatrix_rows|_compute_writeback_idx": "moe_gemm", "moe|sigmoid": "moe", "CatArrayBatched|prepare_inputs": "prepare_next", "_norm_|Norm": "norm", "ncclDevKernel|cross_device_reduce|allreduce": "nccl_and_custom_ar", "topk|TopK": "topk", "act_and_mul_": "activation", "Rotary": "rope", "SoftMax": "softmax", "flash|fmha": "attn", "elementwise": "elementwise", "fp8_quant|cvt_|quantize": "quantize", "reduce_kernel": "reduce", "triton": "triton_kernel", "CUDA mem": "non-gpu-H_D_memops", ".*": "misc" } } }