conversion utilities

918b1163 · rusty1s · 2ae73b17 · 918b1163 · 918b1163 · 918b1163
Commit 918b1163 authored Jan 25, 2020 by rusty1s
7 changed files
--- a/cpu/rowptr.cpp
+++ b/cpu/rowptr.cpp
@@ -4,20 +4,19 @@
 #define CHECK_CPU(x) AT_ASSERTM(!x.type().is_cuda(), #x " must be CPU tensor")
-at::Tensor rowptr(at::Tensor row, int64_t M) {
+at::Tensor ind2ptr(at::Tensor ind, int64_t M) {
-  CHECK_CPU(row);
+  CHECK_CPU(ind);
-  AT_ASSERTM(row.dim() == 1, "Row needs to be one-dimensional");
+  auto out = at::empty(M + 1, ind.options());
+  auto ind_data = ind.DATA_PTR<int64_t>();
-  auto out = at::empty(M + 1, row.options());
-  auto row_data = row.DATA_PTR<int64_t>();
  auto out_data = out.DATA_PTR<int64_t>();
-  int64_t numel = row.numel(), idx = row_data[0], next_idx;
+  int64_t numel = ind.numel(), idx = ind_data[0], next_idx;
  for (int64_t i = 0; i <= idx; i++)
    out_data[i] = 0;
  for (int64_t i = 0; i < numel - 1; i++) {
-    next_idx = row_data[i + 1];
+    next_idx = ind_data[i + 1];
    for (int64_t j = idx; j < next_idx; j++)
      out_data[j + 1] = i + 1;
    idx = next_idx;
@@ -29,6 +28,24 @@ at::Tensor rowptr(at::Tensor row, int64_t M) {
  return out;
 }
+at::Tensor ptr2ind(at::Tensor ptr, int64_t E) {
+  CHECK_CPU(ptr);
+  auto out = at::empty(E, ptr.options());
+  auto ptr_data = ptr.DATA_PTR<int64_t>();
+  auto out_data = out.DATA_PTR<int64_t>();
+  int64_t idx = ptr_data[0], next_idx;
+  for (int64_t i = 0; i < ptr.numel() - 1; i++) {
+    next_idx = ptr_data[i + 1];
+    for (int64_t e = idx; e < next_idx; e++)
+      out_data[e] = i;
+    idx = next_idx;
+  }
+  return out;
+}
 PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
-  m.def("rowptr", &rowptr, "Rowptr (CPU)");
+  m.def("ind2ptr", &ind2ptr, "Ind2Ptr (CPU)");
+  m.def("ptr2ind", &ptr2ind, "Ptr2Ind (CPU)");
 }
--- a/cuda/convert.cpp
+++ b/cuda/convert.cpp
+#include <torch/extension.h>
+#define CHECK_CUDA(x) AT_ASSERTM(x.type().is_cuda(), #x " must be CUDA tensor")
+at::Tensor ind2ptr_cuda(at::Tensor ind, int64_t M);
+at::Tensor ptr2ind_cuda(at::Tensor ptr, int64_t E);
+at::Tensor ind2ptr(at::Tensor ind, int64_t M) {
+  CHECK_CUDA(ind);
+  return ind2ptr_cuda(ind, M);
+}
+at::Tensor ptr2ind(at::Tensor ptr, int64_t E) {
+  CHECK_CUDA(ptr);
+  return ptr2ind_cuda(ptr, E);
+}
+PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
+  m.def("ind2ptr", &ind2ptr, "Ind2Ptr (CUDA)");
+  m.def("ptr2ind", &ptr2ind, "Ptr2Ind (CUDA)");
+}
--- a/cuda/convert_kernel.cu
+++ b/cuda/convert_kernel.cu
+#include <ATen/ATen.h>
+#include <ATen/cuda/CUDAContext.h>
+#include "compat.cuh"
+#define THREADS 1024
+__global__ void ind2ptr_kernel(const int64_t *ind_data, int64_t *out_data,
+                               int64_t M, int64_t numel) {
+  int64_t thread_idx = blockDim.x * blockIdx.x + threadIdx.x;
+  if (thread_idx == 0) {
+    for (int64_t i = 0; i <= ind_data[0]; i++)
+      out_data[i] = 0;
+  } else if (thread_idx < numel) {
+    for (int64_t i = ind_data[thread_idx - 1]; i < ind_data[thread_idx]; i++)
+      out_data[i + 1] = thread_idx;
+  } else if (thread_idx == numel) {
+    for (int64_t i = ind_data[numel - 1] + 1; i < M + 1; i++)
+      out_data[i] = numel;
+  }
+}
+at::Tensor ind2ptr_cuda(at::Tensor ind, int64_t M) {
+  auto out = at::empty(M + 1, ind.options());
+  auto ind_data = ind.DATA_PTR<int64_t>();
+  auto out_data = out.DATA_PTR<int64_t>();
+  auto stream = at::cuda::getCurrentCUDAStream();
+  ind2ptr_kernel<<<(ind.numel() + 2 + THREADS - 1) / THREADS, THREADS, 0,
+                   stream>>>(ind_data, out_data, M, ind.numel());
+  return out;
+}
+__global__ void ptr2ind_kernel(const int64_t *ptr_data, int64_t *out_data,
+                               int64_t E, int64_t numel) {
+  int64_t thread_idx = blockDim.x * blockIdx.x + threadIdx.x;
+  if (thread_idx < numel) {
+    int64_t idx = ptr_data[thread_idx], next_idx = ptr_data[thread_idx + 1];
+    for (int64_t i = idx; i < next_idx; i++) {
+      out_data[i] = thread_idx;
+    }
+  }
+}
+at::Tensor ptr2ind_cuda(at::Tensor ptr, int64_t E) {
+  auto out = at::empty(E, ptr.options());
+  auto ptr_data = ptr.DATA_PTR<int64_t>();
+  auto out_data = out.DATA_PTR<int64_t>();
+  auto stream = at::cuda::getCurrentCUDAStream();
+  ptr2ind_kernel<<<(ptr.numel() + THREADS - 1) / THREADS, THREADS, 0, stream>>>(
+      ptr_data, out_data, E, ptr.numel());
+  return out;
+}
--- a/cuda/rowptr.cpp
+++ b/cuda/rowptr.cpp
-#include <torch/extension.h>
-#define CHECK_CUDA(x) AT_ASSERTM(x.type().is_cuda(), #x " must be CUDA tensor")
-at::Tensor rowptr_cuda(at::Tensor row, int64_t M);
-at::Tensor rowptr(at::Tensor row, int64_t M) {
-  CHECK_CUDA(row);
-  return rowptr_cuda(row, M);
-}
-PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
-  m.def("rowptr", &rowptr, "Rowptr (CUDA)");
-}
--- a/cuda/rowptr_kernel.cu
+++ b/cuda/rowptr_kernel.cu
-#include <ATen/ATen.h>
-#include <ATen/cuda/CUDAContext.h>
-#include "compat.cuh"
-#define THREADS 1024
-__global__ void rowptr_kernel(const int64_t *row_data, int64_t *out_data,
-                              int64_t M, int64_t numel) {
-  int64_t thread_idx = blockDim.x * blockIdx.x + threadIdx.x;
-  if (thread_idx == 0) {
-    for (int64_t i = 0; i <= row_data[0]; i++)
-      out_data[i] = 0;
-  } else if (thread_idx < numel) {
-    for (int64_t i = row_data[thread_idx - 1]; i < row_data[thread_idx]; i++)
-      out_data[i + 1] = thread_idx;
-  } else if (thread_idx == numel) {
-    for (int64_t i = row_data[numel - 1] + 1; i < M + 1; i++)
-      out_data[i] = numel;
-  }
-}
-at::Tensor rowptr_cuda(at::Tensor row, int64_t M) {
-  AT_ASSERTM(row.dim() == 1, "Row needs to be one-dimensional");
-  auto out = at::empty(M + 1, row.options());
-  auto row_data = row.DATA_PTR<int64_t>();
-  auto out_data = out.DATA_PTR<int64_t>();
-  auto stream = at::cuda::getCurrentCUDAStream();
-  rowptr_kernel<<<(row.numel() + 2 + THREADS - 1) / THREADS, THREADS, 0,
-                  stream>>>(row_data, out_data, M, row.numel());
-  return out;
-}
--- a/torch_sparse/storage.py
+++ b/torch_sparse/storage.py
@@ -3,12 +3,12 @@ import warnings
 import torch
 from torch_scatter import segment_csr, scatter_add
-from torch_sparse import rowptr_cpu
+from torch_sparse import convert_cpu
 try:
-    from torch_sparse import rowptr_cuda
+    from torch_sparse import convert_cuda
 except ImportError:
-    rowptr_cuda = None
+    convert_cuda = None
 __cache__ = {'enabled': True}
@@ -159,8 +159,8 @@ class SparseStorage(object):
    @property
    def row(self):
        if self._row is None:
-            # TODO
+            func = convert_cuda if self.rowptr.is_cuda else convert_cpu
-            pass
+            self._row = func.ptr2ind(self.rowptr, self.nnz())
        return self._row
    def has_rowptr(self):
@@ -169,8 +169,8 @@ class SparseStorage(object):
    @property
    def rowptr(self):
        if self._rowptr is None:
-            func = rowptr_cuda if self.row.is_cuda else rowptr_cpu
+            func = convert_cuda if self.row.is_cuda else convert_cpu
-            self._rowptr = func.rowptr(self.row, self.sparse_size[0])
+            self._rowptr = func.ind2ptr(self.row, self.sparse_size[0])
        return self._rowptr
    @property
@@ -258,6 +258,9 @@ class SparseStorage(object):
                              colcount=colcount, csr2csc=self._csr2csc,
                              csc2csr=self._csc2csr, is_sorted=True)
+    def nnz(self):
+        return self.col.numel()
    def has_rowcount(self):
        return self._rowcount is not None
@@ -271,8 +274,8 @@ class SparseStorage(object):
    @cached_property
    def colptr(self):
        if self.has_csr2csc():
-            func = rowptr_cuda if self.col.is_cuda else rowptr_cpu
+            func = convert_cuda if self.col.is_cuda else convert_cpu
-            return func.rowptr(self.col[self.csr2csc], self.sparse_size[1])
+            return func.ind2ptr(self.col[self.csr2csc], self.sparse_size[1])
        else:
            colptr = self.col.new_zeros(self.sparse_size[1] + 1)
            torch.cumsum(self.colcount, dim=0, out=colptr[1:])

--- a/torch_sparse/tensor.py
+++ b/torch_sparse/tensor.py
@@ -178,7 +178,7 @@ class SparseTensor(object):
        return self.size()
    def nnz(self):
-        return self.storage.index.size(1)
+        return self.storage.nnz()
    def density(self):
        return self.nnz() / (self.sparse_size(0) * self.sparse_size(1))