#pragma once #include torch::Tensor knn_cuda(torch::Tensor x, torch::Tensor y, torch::optional ptr_x, torch::optional ptr_y, int64_t k, bool cosine);