#pragma once #include std::tuple> segment_csr_cuda(torch::Tensor src, torch::Tensor indptr, torch::optional optional_out, std::string reduce); torch::Tensor gather_csr_cuda(torch::Tensor src, torch::Tensor indptr, torch::optional optional_out); template __device__ T __ldg(const T* ptr) { return *ptr; }