moe_sum.h 327 Bytes
Newer Older
Xiaowei.zhang's avatar
Xiaowei.zhang committed
1
2
3
4
5
6
7
8
9
#pragma once
// SPDX-License-Identifier: MIT
 
#include <torch/extension.h>
#include "aiter_enum.h"

void asm_moe_sum(torch::Tensor& input,                   // [experts, block_size, hidden_size]
                 torch::Tensor& output,                  // [num_tokens, hidden_size]
                 torch::Tensor& sorted_ids);