#pragma once #include std::tuple, std::vector> bin_assignment_cuda(torch::Tensor rowcount, torch::Tensor binptr); std::tuple padded_index_select_cuda(torch::Tensor src, torch::Tensor rowptr, torch::Tensor col, torch::Tensor index, int64_t length, torch::Tensor fill_value); // std::tuple padded_index_select_cuda2( // torch::Tensor src, torch::Tensor rowptr, torch::Tensor col, // torch::Tensor bin, torch::Tensor index, std::vector node_counts, // std::vector lengths, torch::Tensor fill_value);