#pragma once #include std::vector bin_assignment_cuda(torch::Tensor rowcount, torch::Tensor bin_strategy); std::tuple padded_index_select_cuda(torch::Tensor src, torch::Tensor rowptr, torch::Tensor col, torch::Tensor index, int64_t length);