Commit 01a51d47 authored by Duc's avatar Duc
Browse files
parents be87cc98 817b767e
......@@ -10,38 +10,51 @@ env:
- CUDA_HOME=/usr/local/cuda
jobs:
# Python 3.6
- PYTHON_VERSION=3.6 TORCH_VERSION=1.7.0 IDX=cpu
- PYTHON_VERSION=3.6 TORCH_VERSION=1.7.0 IDX=cu92
- PYTHON_VERSION=3.6 TORCH_VERSION=1.7.0 IDX=cu101
- PYTHON_VERSION=3.6 TORCH_VERSION=1.7.0 IDX=cu102
- PYTHON_VERSION=3.6 TORCH_VERSION=1.7.0 IDX=cu110
- PYTHON_VERSION=3.6 TORCH_VERSION=1.6.0 IDX=cpu
- PYTHON_VERSION=3.6 TORCH_VERSION=1.6.0 IDX=cu92
- PYTHON_VERSION=3.6 TORCH_VERSION=1.6.0 IDX=cu101
- PYTHON_VERSION=3.6 TORCH_VERSION=1.6.0 IDX=cu102
- PYTHON_VERSION=3.6 TORCH_VERSION=1.5.0 IDX=cpu
- PYTHON_VERSION=3.6 TORCH_VERSION=1.5.0 IDX=cu92
- PYTHON_VERSION=3.6 TORCH_VERSION=1.5.0 IDX=cu101
- PYTHON_VERSION=3.6 TORCH_VERSION=1.5.0 IDX=cu102
# Python 3.7
- PYTHON_VERSION=3.7 TORCH_VERSION=1.7.0 IDX=cpu
- PYTHON_VERSION=3.7 TORCH_VERSION=1.7.0 IDX=cu92
- PYTHON_VERSION=3.7 TORCH_VERSION=1.7.0 IDX=cu101
- PYTHON_VERSION=3.7 TORCH_VERSION=1.7.0 IDX=cu102
- PYTHON_VERSION=3.7 TORCH_VERSION=1.7.0 IDX=cu110
- PYTHON_VERSION=3.7 TORCH_VERSION=1.6.0 IDX=cpu
- PYTHON_VERSION=3.7 TORCH_VERSION=1.6.0 IDX=cu92
- PYTHON_VERSION=3.7 TORCH_VERSION=1.6.0 IDX=cu101
- PYTHON_VERSION=3.7 TORCH_VERSION=1.6.0 IDX=cu102
- PYTHON_VERSION=3.7 TORCH_VERSION=1.5.0 IDX=cpu
- PYTHON_VERSION=3.7 TORCH_VERSION=1.5.0 IDX=cu92
- PYTHON_VERSION=3.7 TORCH_VERSION=1.5.0 IDX=cu101
- PYTHON_VERSION=3.7 TORCH_VERSION=1.5.0 IDX=cu102
# Python 3.8
- PYTHON_VERSION=3.8 TORCH_VERSION=1.7.0 IDX=cpu
- PYTHON_VERSION=3.8 TORCH_VERSION=1.7.0 IDX=cu92
- PYTHON_VERSION=3.8 TORCH_VERSION=1.7.0 IDX=cu101
- PYTHON_VERSION=3.8 TORCH_VERSION=1.7.0 IDX=cu102
- PYTHON_VERSION=3.8 TORCH_VERSION=1.7.0 IDX=cu110
- PYTHON_VERSION=3.8 TORCH_VERSION=1.6.0 IDX=cpu
- PYTHON_VERSION=3.8 TORCH_VERSION=1.6.0 IDX=cu92
- PYTHON_VERSION=3.8 TORCH_VERSION=1.6.0 IDX=cu101
- PYTHON_VERSION=3.8 TORCH_VERSION=1.6.0 IDX=cu102
- PYTHON_VERSION=3.8 TORCH_VERSION=1.5.0 IDX=cpu
- PYTHON_VERSION=3.8 TORCH_VERSION=1.5.0 IDX=cu92
- PYTHON_VERSION=3.8 TORCH_VERSION=1.5.0 IDX=cu101
- PYTHON_VERSION=3.8 TORCH_VERSION=1.5.0 IDX=cu102
jobs:
# For daily builds, we only test on *Python 3.6* and *CUDA 10.2*.
# For daily builds, we only test on Python 3.6 with CUDA 10.2/11.0.
# For deployment, we exclude *all* macOS CUDA and Windows CUDA 9.2 jobs.
exclude:
# Python 3.6
- if: not (tag is present or commit_message =~ ci-deploy) and os != osx
env: PYTHON_VERSION=3.6 TORCH_VERSION=1.7.0 IDX=cpu
- if: not (tag is present or commit_message =~ ci-deploy) or os != linux
env: PYTHON_VERSION=3.6 TORCH_VERSION=1.7.0 IDX=cu92
- if: not (tag is present or commit_message =~ ci-deploy) or os = osx
env: PYTHON_VERSION=3.6 TORCH_VERSION=1.7.0 IDX=cu101
- if: os = osx
env: PYTHON_VERSION=3.6 TORCH_VERSION=1.7.0 IDX=cu102
- if: os = osx
env: PYTHON_VERSION=3.6 TORCH_VERSION=1.7.0 IDX=cu110
- if: not (tag is present or commit_message =~ ci-deploy) and os != osx
env: PYTHON_VERSION=3.6 TORCH_VERSION=1.6.0 IDX=cpu
- if: not (tag is present or commit_message =~ ci-deploy) or os != linux
......@@ -50,15 +63,17 @@ jobs:
env: PYTHON_VERSION=3.6 TORCH_VERSION=1.6.0 IDX=cu101
- if: os = osx
env: PYTHON_VERSION=3.6 TORCH_VERSION=1.6.0 IDX=cu102
- if: not (tag is present or commit_message =~ ci-deploy) and os != osx
env: PYTHON_VERSION=3.6 TORCH_VERSION=1.5.0 IDX=cpu
# Python 3.7
- if: not (tag is present or commit_message =~ ci-deploy)
env: PYTHON_VERSION=3.7 TORCH_VERSION=1.7.0 IDX=cpu
- if: not (tag is present or commit_message =~ ci-deploy) or os != linux
env: PYTHON_VERSION=3.6 TORCH_VERSION=1.5.0 IDX=cu92
env: PYTHON_VERSION=3.7 TORCH_VERSION=1.7.0 IDX=cu92
- if: not (tag is present or commit_message =~ ci-deploy) or os = osx
env: PYTHON_VERSION=3.6 TORCH_VERSION=1.5.0 IDX=cu101
- if: os = osx
env: PYTHON_VERSION=3.6 TORCH_VERSION=1.5.0 IDX=cu102
# Python 3.7
env: PYTHON_VERSION=3.7 TORCH_VERSION=1.7.0 IDX=cu101
- if: not (tag is present or commit_message =~ ci-deploy) or os = osx
env: PYTHON_VERSION=3.7 TORCH_VERSION=1.7.0 IDX=cu102
- if: not (tag is present or commit_message =~ ci-deploy) or os = osx
env: PYTHON_VERSION=3.7 TORCH_VERSION=1.7.0 IDX=cu110
- if: not (tag is present or commit_message =~ ci-deploy)
env: PYTHON_VERSION=3.7 TORCH_VERSION=1.6.0 IDX=cpu
- if: not (tag is present or commit_message =~ ci-deploy) or os != linux
......@@ -67,15 +82,17 @@ jobs:
env: PYTHON_VERSION=3.7 TORCH_VERSION=1.6.0 IDX=cu101
- if: not (tag is present or commit_message =~ ci-deploy) or os = osx
env: PYTHON_VERSION=3.7 TORCH_VERSION=1.6.0 IDX=cu102
# Python 3.8
- if: not (tag is present or commit_message =~ ci-deploy)
env: PYTHON_VERSION=3.7 TORCH_VERSION=1.5.0 IDX=cpu
env: PYTHON_VERSION=3.8 TORCH_VERSION=1.7.0 IDX=cpu
- if: not (tag is present or commit_message =~ ci-deploy) or os != linux
env: PYTHON_VERSION=3.7 TORCH_VERSION=1.5.0 IDX=cu92
env: PYTHON_VERSION=3.8 TORCH_VERSION=1.7.0 IDX=cu92
- if: not (tag is present or commit_message =~ ci-deploy) or os = osx
env: PYTHON_VERSION=3.7 TORCH_VERSION=1.5.0 IDX=cu101
env: PYTHON_VERSION=3.8 TORCH_VERSION=1.7.0 IDX=cu101
- if: not (tag is present or commit_message =~ ci-deploy) or os = osx
env: PYTHON_VERSION=3.7 TORCH_VERSION=1.5.0 IDX=cu102
# Python 3.8
env: PYTHON_VERSION=3.8 TORCH_VERSION=1.7.0 IDX=cu102
- if: not (tag is present or commit_message =~ ci-deploy) or os = osx
env: PYTHON_VERSION=3.8 TORCH_VERSION=1.7.0 IDX=cu110
- if: not (tag is present or commit_message =~ ci-deploy)
env: PYTHON_VERSION=3.8 TORCH_VERSION=1.6.0 IDX=cpu
- if: not (tag is present or commit_message =~ ci-deploy) or os != linux
......@@ -84,14 +101,6 @@ jobs:
env: PYTHON_VERSION=3.8 TORCH_VERSION=1.6.0 IDX=cu101
- if: not (tag is present or commit_message =~ ci-deploy) or os = osx
env: PYTHON_VERSION=3.8 TORCH_VERSION=1.6.0 IDX=cu102
- if: not (tag is present or commit_message =~ ci-deploy)
env: PYTHON_VERSION=3.8 TORCH_VERSION=1.5.0 IDX=cpu
- if: not (tag is present or commit_message =~ ci-deploy) or os != linux
env: PYTHON_VERSION=3.8 TORCH_VERSION=1.5.0 IDX=cu92
- if: not (tag is present or commit_message =~ ci-deploy) or os = osx
env: PYTHON_VERSION=3.8 TORCH_VERSION=1.5.0 IDX=cu101
- if: not (tag is present or commit_message =~ ci-deploy) or os = osx
env: PYTHON_VERSION=3.8 TORCH_VERSION=1.5.0 IDX=cu102
install:
- source script/gcc.sh
......
cmake_minimum_required(VERSION 3.0)
project(torchcluster)
set(CMAKE_CXX_STANDARD 14)
set(TORCHCLUSTER_VERSION 1.5.7)
set(TORCHCLUSTER_VERSION 1.5.8)
option(WITH_CUDA "Enable CUDA support" OFF)
......
......@@ -31,28 +31,29 @@ All included operations work on varying data types and are implemented both for
We provide pip wheels for all major OS/PyTorch/CUDA combinations, see [here](https://s3.eu-central-1.amazonaws.com/pytorch-geometric.com/whl/index.html).
#### PyTorch 1.6.0
#### PyTorch 1.7.0
To install the binaries for PyTorch 1.6.0, simply run
To install the binaries for PyTorch 1.7.0, simply run
```
pip install torch-cluster==latest+${CUDA} -f https://pytorch-geometric.com/whl/torch-1.6.0.html
pip install torch-cluster -f https://pytorch-geometric.com/whl/torch-1.7.0+${CUDA}.html
```
where `${CUDA}` should be replaced by either `cpu`, `cu92`, `cu101` or `cu102` depending on your PyTorch installation.
where `${CUDA}` should be replaced by either `cpu`, `cu92`, `cu101`, `cu102`, or `cu110` depending on your PyTorch installation.
| | `cpu` | `cu92` | `cu101` | `cu102` |
|-------------|-------|--------|---------|---------|
| **Linux** | ✅ | ✅ | ✅ | ✅ |
| **Windows** | ✅ | ❌ | ✅ | ✅ |
| **macOS** | ✅ | | | |
| | `cpu` | `cu92` | `cu101` | `cu102` | `cu110` |
|-------------|-------|--------|---------|---------|---------|
| **Linux** | ✅ | ✅ | ✅ | ✅ | ✅ |
| **Windows** | ✅ | ❌ | ✅ | ✅ | ✅ |
| **macOS** | ✅ | | | | |
#### PyTorch 1.5.0
To install the binaries for PyTorch 1.5.0, simply run
#### PyTorch 1.6.0
To install the binaries for PyTorch 1.6.0, simply run
```
pip install torch-cluster==latest+${CUDA} -f https://pytorch-geometric.com/whl/torch-1.5.0.html
pip install torch-cluster -f https://pytorch-geometric.com/whl/torch-1.6.0+${CUDA}.html
```
where `${CUDA}` should be replaced by either `cpu`, `cu92`, `cu101` or `cu102` depending on your PyTorch installation.
......@@ -63,21 +64,7 @@ where `${CUDA}` should be replaced by either `cpu`, `cu92`, `cu101` or `cu102` d
| **Windows** | ✅ | ❌ | ✅ | ✅ |
| **macOS** | ✅ | | | |
#### PyTorch 1.4.0
To install the binaries for PyTorch 1.4.0, simply run
```
pip install torch-cluster==latest+${CUDA} -f https://pytorch-geometric.com/whl/torch-1.4.0.html
```
where `${CUDA}` should be replaced by either `cpu`, `cu92`, `cu100` or `cu101` depending on your PyTorch installation.
| | `cpu` | `cu92` | `cu100` | `cu101` |
|-------------|-------|--------|---------|---------|
| **Linux** | ✅ | ✅ | ✅ | ✅ |
| **Windows** | ✅ | ❌ | ❌ | ✅ |
| **macOS** | ✅ | | | |
**Note:** Binaries of older versions are also provided for PyTorch 1.4.0 and PyTorch 1.5.0 (following the same procedure).
### From source
......
......@@ -23,9 +23,9 @@ torch::Tensor nearest(torch::Tensor x, torch::Tensor y, torch::Tensor ptr_x,
torch::Tensor radius(torch::Tensor x, torch::Tensor y, torch::Tensor ptr_x,
torch::Tensor ptr_y, double r, int64_t max_num_neighbors);
torch::Tensor random_walk(torch::Tensor rowptr, torch::Tensor col,
torch::Tensor start, int64_t walk_length, double p,
double q);
std::tuple<torch::Tensor, torch::Tensor>
random_walk(torch::Tensor rowptr, torch::Tensor col, torch::Tensor start,
int64_t walk_length, double p, double q);
torch::Tensor neighbor_sampler(torch::Tensor start, torch::Tensor rowptr,
int64_t count, double factor);
#include "fps_cpu.h"
#include <ATen/Parallel.h>
#include "utils.h"
inline torch::Tensor get_dist(torch::Tensor x, int64_t idx) {
return (x - x[idx]).norm(2, 1);
return (x - x[idx]).pow_(2).sum(1);
}
torch::Tensor fps_cpu(torch::Tensor src, torch::Tensor ptr, torch::Tensor ratio,
......@@ -28,27 +30,29 @@ torch::Tensor fps_cpu(torch::Tensor src, torch::Tensor ptr, torch::Tensor ratio,
auto out_ptr_data = out_ptr.data_ptr<int64_t>();
auto out_data = out.data_ptr<int64_t>();
int64_t src_start = 0, out_start = 0, src_end, out_end;
for (auto b = 0; b < batch_size; b++) {
src_end = ptr_data[b + 1], out_end = out_ptr_data[b];
auto y = src.narrow(0, src_start, src_end - src_start);
int64_t grain_size = 1; // Always parallelize over batch dimension.
at::parallel_for(0, batch_size, grain_size, [&](int64_t begin, int64_t end) {
int64_t src_start, src_end, out_start, out_end;
for (int64_t b = begin; b < end; b++) {
src_start = ptr_data[b], src_end = ptr_data[b + 1];
out_start = b == 0 ? 0 : out_ptr_data[b - 1], out_end = out_ptr_data[b];
int64_t start_idx = 0;
if (random_start) {
start_idx = rand() % y.size(0);
}
auto y = src.narrow(0, src_start, src_end - src_start);
out_data[out_start] = src_start + start_idx;
auto dist = get_dist(y, start_idx);
int64_t start_idx = 0;
if (random_start)
start_idx = rand() % y.size(0);
for (auto i = 1; i < out_end - out_start; i++) {
int64_t argmax = dist.argmax().data_ptr<int64_t>()[0];
out_data[out_start + i] = src_start + argmax;
dist = torch::min(dist, get_dist(y, argmax));
}
out_data[out_start] = src_start + start_idx;
auto dist = get_dist(y, start_idx);
src_start = src_end, out_start = out_end;
}
for (int64_t i = 1; i < out_end - out_start; i++) {
int64_t argmax = dist.argmax().data_ptr<int64_t>()[0];
out_data[out_start + i] = src_start + argmax;
dist = torch::min(dist, get_dist(y, argmax));
}
}
});
return out;
}
#include "rw_cpu.h"
#include <ATen/Parallel.h>
#include "utils.h"
torch::Tensor random_walk_cpu(torch::Tensor rowptr, torch::Tensor col,
torch::Tensor start, int64_t walk_length,
double p, double q) {
void uniform_sampling(const int64_t *rowptr, const int64_t *col,
const int64_t *start, int64_t *n_out, int64_t *e_out,
const int64_t numel, const int64_t walk_length) {
auto rand = torch::rand({numel, walk_length});
auto rand_data = rand.data_ptr<float>();
int64_t grain_size = at::internal::GRAIN_SIZE / walk_length;
at::parallel_for(0, numel, grain_size, [&](int64_t begin, int64_t end) {
for (auto n = begin; n < end; n++) {
int64_t n_cur = start[n], e_cur, row_start, row_end, idx;
n_out[n * (walk_length + 1)] = n_cur;
for (auto l = 0; l < walk_length; l++) {
row_start = rowptr[n_cur], row_end = rowptr[n_cur + 1];
if (row_end - row_start == 0) {
e_cur = -1;
} else {
idx = int64_t(rand_data[n * walk_length + l] * (row_end - row_start));
e_cur = row_start + idx;
n_cur = col[e_cur];
}
n_out[n * (walk_length + 1) + (l + 1)] = n_cur;
e_out[n * walk_length + l] = e_cur;
}
}
});
}
bool inline is_neighbor(const int64_t *rowptr, const int64_t *col, int64_t v,
int64_t w) {
int64_t row_start = rowptr[v], row_end = rowptr[v + 1];
for (auto i = row_start; i < row_end; i++) {
if (col[i] == w)
return true;
}
return false;
}
// See: https://louisabraham.github.io/articles/node2vec-sampling.html
void rejection_sampling(const int64_t *rowptr, const int64_t *col,
int64_t *start, int64_t *n_out, int64_t *e_out,
const int64_t numel, const int64_t walk_length,
const double p, const double q) {
double max_prob = fmax(fmax(1. / p, 1.), 1. / q);
double prob_0 = 1. / p / max_prob;
double prob_1 = 1. / max_prob;
double prob_2 = 1. / q / max_prob;
int64_t grain_size = at::internal::GRAIN_SIZE / walk_length;
at::parallel_for(0, numel, grain_size, [&](int64_t begin, int64_t end) {
for (auto n = begin; n < end; n++) {
int64_t t = start[n], v, x, e_cur, row_start, row_end;
n_out[n * (walk_length + 1)] = t;
row_start = rowptr[t], row_end = rowptr[t + 1];
if (row_end - row_start == 0) {
e_cur = -1;
v = t;
} else {
e_cur = row_start + (rand() % (row_end - row_start));
v = col[e_cur];
}
n_out[n * (walk_length + 1) + 1] = v;
e_out[n * walk_length] = e_cur;
for (auto l = 1; l < walk_length; l++) {
row_start = rowptr[v], row_end = rowptr[v + 1];
if (row_end - row_start == 0) {
e_cur = -1;
x = v;
} else if (row_end - row_start == 1) {
e_cur = row_start;
x = col[e_cur];
} else {
while (true) {
e_cur = row_start + (rand() % (row_end - row_start));
x = col[e_cur];
auto r = ((double)rand() / (RAND_MAX)); // [0, 1)
if (x == t && r < prob_0)
break;
else if (is_neighbor(rowptr, col, x, t) && r < prob_1)
break;
else if (r < prob_2)
break;
}
}
n_out[n * (walk_length + 1) + (l + 1)] = x;
e_out[n * walk_length + l] = e_cur;
t = v;
v = x;
}
}
});
}
std::tuple<torch::Tensor, torch::Tensor>
random_walk_cpu(torch::Tensor rowptr, torch::Tensor col, torch::Tensor start,
int64_t walk_length, double p, double q) {
CHECK_CPU(rowptr);
CHECK_CPU(col);
CHECK_CPU(start);
......@@ -13,35 +118,22 @@ torch::Tensor random_walk_cpu(torch::Tensor rowptr, torch::Tensor col,
CHECK_INPUT(col.dim() == 1);
CHECK_INPUT(start.dim() == 1);
auto rand = torch::rand({start.size(0), walk_length},
start.options().dtype(torch::kFloat));
auto out = torch::empty({start.size(0), walk_length + 1}, start.options());
auto n_out = torch::empty({start.size(0), walk_length + 1}, start.options());
auto e_out = torch::empty({start.size(0), walk_length}, start.options());
auto rowptr_data = rowptr.data_ptr<int64_t>();
auto col_data = col.data_ptr<int64_t>();
auto start_data = start.data_ptr<int64_t>();
auto rand_data = rand.data_ptr<float>();
auto out_data = out.data_ptr<int64_t>();
for (auto n = 0; n < start.size(0); n++) {
auto cur = start_data[n];
auto offset = n * (walk_length + 1);
out_data[offset] = cur;
auto n_out_data = n_out.data_ptr<int64_t>();
auto e_out_data = e_out.data_ptr<int64_t>();
int64_t row_start, row_end, rnd;
for (auto l = 1; l <= walk_length; l++) {
row_start = rowptr_data[cur], row_end = rowptr_data[cur + 1];
if (row_end - row_start == 0) {
cur = n;
} else {
rnd = int64_t(rand_data[n * walk_length + (l - 1)] *
(row_end - row_start));
cur = col_data[row_start + rnd];
}
out_data[offset + l] = cur;
}
if (p == 1. && q == 1.) {
uniform_sampling(rowptr_data, col_data, start_data, n_out_data, e_out_data,
start.numel(), walk_length);
} else {
rejection_sampling(rowptr_data, col_data, start_data, n_out_data,
e_out_data, start.numel(), walk_length, p, q);
}
return out;
return std::make_tuple(n_out, e_out);
}
......@@ -2,6 +2,6 @@
#include <torch/extension.h>
torch::Tensor random_walk_cpu(torch::Tensor rowptr, torch::Tensor col,
torch::Tensor start, int64_t walk_length,
double p, double q);
std::tuple<torch::Tensor, torch::Tensor>
random_walk_cpu(torch::Tensor rowptr, torch::Tensor col, torch::Tensor start,
int64_t walk_length, double p, double q);
#include "rw_cuda.h"
#include <ATen/cuda/CUDAContext.h>
#include <curand.h>
#include <curand_kernel.h>
#include "utils.cuh"
#define THREADS 1024
#define BLOCKS(N) (N + THREADS - 1) / THREADS
__global__ void uniform_random_walk_kernel(const int64_t *rowptr,
const int64_t *col,
const int64_t *start,
const float *rand, int64_t *out,
int64_t walk_length, int64_t numel) {
__global__ void uniform_sampling_kernel(const int64_t *rowptr,
const int64_t *col,
const int64_t *start, const float *rand,
int64_t *n_out, int64_t *e_out,
const int64_t walk_length,
const int64_t numel) {
const int64_t thread_idx = blockIdx.x * blockDim.x + threadIdx.x;
if (thread_idx < numel) {
int64_t n_cur = start[thread_idx], e_cur, row_start, row_end, rnd;
n_out[thread_idx] = n_cur;
for (int64_t l = 0; l < walk_length; l++) {
row_start = rowptr[n_cur], row_end = rowptr[n_cur + 1];
if (row_end - row_start == 0) {
e_cur = -1;
} else {
rnd = int64_t(rand[l * numel + thread_idx] * (row_end - row_start));
e_cur = row_start + rnd;
n_cur = col[e_cur];
}
n_out[(l + 1) * numel + thread_idx] = n_cur;
e_out[l * numel + thread_idx] = e_cur;
}
}
}
__global__ void
rejection_sampling_kernel(unsigned int seed, const int64_t *rowptr,
const int64_t *col, const int64_t *start,
int64_t *n_out, int64_t *e_out,
const int64_t walk_length, const int64_t numel,
const double p, const double q) {
curandState_t state;
curand_init(seed, 0, 0, &state);
double max_prob = fmax(fmax(1. / p, 1.), 1. / q);
double prob_0 = 1. / p / max_prob;
double prob_1 = 1. / max_prob;
double prob_2 = 1. / q / max_prob;
const int64_t thread_idx = blockIdx.x * blockDim.x + threadIdx.x;
if (thread_idx < numel) {
out[thread_idx] = start[thread_idx];
int64_t t = start[thread_idx], v, x, e_cur, row_start, row_end;
int64_t row_start, row_end, i, cur;
for (int64_t l = 1; l <= walk_length; l++) {
i = (l - 1) * numel + thread_idx;
cur = out[i];
row_start = rowptr[cur], row_end = rowptr[cur + 1];
n_out[thread_idx] = t;
row_start = rowptr[t], row_end = rowptr[t + 1];
if (row_end - row_start == 0) {
e_cur = -1;
v = t;
} else {
e_cur = row_start + (curand(&state) % (row_end - row_start));
v = col[e_cur];
}
n_out[numel + thread_idx] = v;
e_out[thread_idx] = e_cur;
for (int64_t l = 1; l < walk_length; l++) {
row_start = rowptr[v], row_end = rowptr[v + 1];
if (row_end - row_start == 0) {
out[l * numel + thread_idx] = cur;
e_cur = -1;
x = v;
} else if (row_end - row_start == 1) {
e_cur = row_start;
x = col[e_cur];
} else {
out[l * numel + thread_idx] =
col[row_start + int64_t(rand[i] * (row_end - row_start))];
while (true) {
e_cur = row_start + (curand(&state) % (row_end - row_start));
x = col[e_cur];
double r = curand_uniform(&state); // (0, 1]
if (x == t && r < prob_0)
break;
bool is_neighbor = false;
row_start = rowptr[x], row_end = rowptr[x + 1];
for (int64_t i = row_start; i < row_end; i++) {
if (col[i] == t) {
is_neighbor = true;
break;
}
}
if (is_neighbor && r < prob_1)
break;
else if (r < prob_2)
break;
}
}
n_out[(l + 1) * numel + thread_idx] = x;
e_out[l * numel + thread_idx] = e_cur;
t = v;
v = x;
}
}
}
torch::Tensor random_walk_cuda(torch::Tensor rowptr, torch::Tensor col,
torch::Tensor start, int64_t walk_length,
double p, double q) {
std::tuple<torch::Tensor, torch::Tensor>
random_walk_cuda(torch::Tensor rowptr, torch::Tensor col, torch::Tensor start,
int64_t walk_length, double p, double q) {
CHECK_CUDA(rowptr);
CHECK_CUDA(col);
CHECK_CUDA(start);
......@@ -45,15 +127,26 @@ torch::Tensor random_walk_cuda(torch::Tensor rowptr, torch::Tensor col,
CHECK_INPUT(col.dim() == 1);
CHECK_INPUT(start.dim() == 1);
auto rand = torch::rand({start.size(0), walk_length},
start.options().dtype(torch::kFloat));
auto out = torch::empty({walk_length + 1, start.size(0)}, start.options());
auto n_out = torch::empty({walk_length + 1, start.size(0)}, start.options());
auto e_out = torch::empty({walk_length, start.size(0)}, start.options());
auto stream = at::cuda::getCurrentCUDAStream();
uniform_random_walk_kernel<<<BLOCKS(start.numel()), THREADS, 0, stream>>>(
rowptr.data_ptr<int64_t>(), col.data_ptr<int64_t>(),
start.data_ptr<int64_t>(), rand.data_ptr<float>(),
out.data_ptr<int64_t>(), walk_length, start.numel());
return out.t().contiguous();
if (p == 1. && q == 1.) {
auto rand = torch::rand({start.size(0), walk_length},
start.options().dtype(torch::kFloat));
uniform_sampling_kernel<<<BLOCKS(start.numel()), THREADS, 0, stream>>>(
rowptr.data_ptr<int64_t>(), col.data_ptr<int64_t>(),
start.data_ptr<int64_t>(), rand.data_ptr<float>(),
n_out.data_ptr<int64_t>(), e_out.data_ptr<int64_t>(), walk_length,
start.numel());
} else {
rejection_sampling_kernel<<<BLOCKS(start.numel()), THREADS, 0, stream>>>(
time(NULL), rowptr.data_ptr<int64_t>(), col.data_ptr<int64_t>(),
start.data_ptr<int64_t>(), n_out.data_ptr<int64_t>(),
e_out.data_ptr<int64_t>(), walk_length, start.numel(), p, q);
}
return std::make_tuple(n_out.t().contiguous(), e_out.t().contiguous());
}
......@@ -2,6 +2,6 @@
#include <torch/extension.h>
torch::Tensor random_walk_cuda(torch::Tensor rowptr, torch::Tensor col,
torch::Tensor start, int64_t walk_length,
double p, double q);
std::tuple<torch::Tensor, torch::Tensor>
random_walk_cuda(torch::Tensor rowptr, torch::Tensor col, torch::Tensor start,
int64_t walk_length, double p, double q);
......@@ -11,9 +11,9 @@
PyMODINIT_FUNC PyInit__rw(void) { return NULL; }
#endif
torch::Tensor random_walk(torch::Tensor rowptr, torch::Tensor col,
torch::Tensor start, int64_t walk_length, double p,
double q) {
std::tuple<torch::Tensor, torch::Tensor>
random_walk(torch::Tensor rowptr, torch::Tensor col, torch::Tensor start,
int64_t walk_length, double p, double q) {
if (rowptr.device().is_cuda()) {
#ifdef WITH_CUDA
return random_walk_cuda(rowptr, col, start, walk_length, p, q);
......
......@@ -8,16 +8,14 @@ if [ "${TRAVIS_OS_NAME}" = "linux" ] && [ "$IDX" = "cu92" ]; then
export CUDA_SHORT=9.2
export CUDA=9.2.148-1
export UBUNTU_VERSION=ubuntu1604
export CUBLAS=cuda-cublas-dev-9-2
export TOOLKIT="cudatoolkit=${CUDA_SHORT}"
fi
if [ "${TRAVIS_OS_NAME}" = "linux" ] && [ "$IDX" = "cu101" ]; then
export IDX=cu101
export CUDA_SHORT=10.1
export CUDA=10.1.105-1
export CUDA=10.1.243-1
export UBUNTU_VERSION=ubuntu1804
export CUBLAS=libcublas-dev
export TOOLKIT="cudatoolkit=${CUDA_SHORT}"
fi
......@@ -26,7 +24,11 @@ if [ "${TRAVIS_OS_NAME}" = "linux" ] && [ "$IDX" = "cu102" ]; then
export CUDA_SHORT=10.2
export CUDA=10.2.89-1
export UBUNTU_VERSION=ubuntu1804
export CUBLAS=libcublas-dev
export TOOLKIT="cudatoolkit=${CUDA_SHORT}"
fi
if [ "${TRAVIS_OS_NAME}" = "linux" ] && [ "$IDX" = "cu110" ]; then
export CUDA_SHORT=11.0
export TOOLKIT="cudatoolkit=${CUDA_SHORT}"
fi
......@@ -55,6 +57,13 @@ if [ "${TRAVIS_OS_NAME}" = "windows" ] && [ "$IDX" = "cu102" ]; then
export TOOLKIT="cudatoolkit=${CUDA_SHORT}"
fi
if [ "${TRAVIS_OS_NAME}" = "windows" ] && [ "$IDX" = "cu110" ]; then
export CUDA_SHORT=11.0
export CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}.2/local_installers
export CUDA_FILE=cuda_${CUDA_SHORT}.2_451.48_win10.exe
export TOOLKIT="cudatoolkit=${CUDA_SHORT}"
fi
if [ "${TRAVIS_OS_NAME}" = "osx" ] && [ "$IDX" = "cpu" ]; then
export TOOLKIT=""
fi
......@@ -65,14 +74,37 @@ else
export FORCE_CUDA=1
fi
if [ "${TRAVIS_OS_NAME}" = "linux" ] && [ "${IDX}" != "cpu" ]; then
INSTALLER=cuda-repo-${UBUNTU_VERSION}_${CUDA}_amd64.deb
if [ "${TRAVIS_OS_NAME}" = "linux" ] && [ "${IDX}" != "cpu" ] && [ "${IDX}" != "cu110" ]; then
INSTALLER="cuda-repo-${UBUNTU_VERSION}_${CUDA}_amd64.deb"
wget -nv "http://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/${INSTALLER}"
sudo dpkg -i "${INSTALLER}"
wget -nv "https://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/7fa2af80.pub"
sudo apt-key add 7fa2af80.pub
sudo apt update -qq
sudo apt install -y "cuda-core-${CUDA_SHORT/./-}" "cuda-cudart-dev-${CUDA_SHORT/./-}" "${CUBLAS}" "cuda-cusparse-dev-${CUDA_SHORT/./-}"
sudo apt install "cuda-core-${CUDA_SHORT/./-}" "cuda-nvcc-${CUDA_SHORT/./-}" "cuda-libraries-dev-${CUDA_SHORT/./-}"
sudo apt clean
CUDA_HOME=/usr/local/cuda-${CUDA_SHORT}
LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
PATH=${CUDA_HOME}/bin:${PATH}
nvcc --version
# Fix cublas on CUDA 10.1:
if [ -d "/usr/local/cuda-10.2/targets/x86_64-linux/include" ]; then
sudo cp -r /usr/local/cuda-10.2/targets/x86_64-linux/include/* "${CUDA_HOME}/include/"
fi
if [ -d "/usr/local/cuda-10.2/targets/x86_64-linux/lib" ]; then
sudo cp -r /usr/local/cuda-10.2/targets/x86_64-linux/lib/* "${CUDA_HOME}/lib/"
fi
fi
if [ "${TRAVIS_OS_NAME}" = "linux" ] && [ "${IDX}" = "cu110" ]; then
wget -nv https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-ubuntu1804.pin
sudo mv cuda-ubuntu1804.pin /etc/apt/preferences.d/cuda-repository-pin-600
wget -nv https://developer.download.nvidia.com/compute/cuda/11.0.3/local_installers/cuda-repo-ubuntu1804-11-0-local_11.0.3-450.51.06-1_amd64.deb
sudo dpkg -i cuda-repo-ubuntu1804-11-0-local_11.0.3-450.51.06-1_amd64.deb
sudo apt-key add /var/cuda-repo-ubuntu1804-11-0-local/7fa2af80.pub
sudo apt update -qq
sudo apt install cuda-nvcc-11-0 cuda-libraries-dev-11-0
sudo apt clean
CUDA_HOME=/usr/local/cuda-${CUDA_SHORT}
LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
......@@ -86,16 +118,11 @@ if [ "${TRAVIS_OS_NAME}" = "windows" ] && [ "${IDX}" != "cpu" ]; then
curl -k -L "https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download" --output "/tmp/gpu_driver_dlls.zip"
7z x "/tmp/gpu_driver_dlls.zip" -o"/c/Windows/System32"
# Install CUDA
# Install CUDA:
wget -nv "${CUDA_URL}/${CUDA_FILE}"
PowerShell -Command "Start-Process -FilePath \"${CUDA_FILE}\" -ArgumentList \"-s nvcc_${CUDA_SHORT} cublas_dev_${CUDA_SHORT} cusparse_dev_${CUDA_SHORT}\" -Wait -NoNewWindow"
PowerShell -Command "Start-Process -FilePath \"${CUDA_FILE}\" -ArgumentList \"-s nvcc_${CUDA_SHORT} cuobjdump_${CUDA_SHORT} nvprune_${CUDA_SHORT} cupti_${CUDA_SHORT} cublas_dev_${CUDA_SHORT} cudart_${CUDA_SHORT} cufft_dev_${CUDA_SHORT} curand_dev_${CUDA_SHORT} cusolver_dev_${CUDA_SHORT} cusparse_dev_${CUDA_SHORT} npp_dev_${CUDA_SHORT} nvrtc_dev_${CUDA_SHORT} nvml_dev_${CUDA_SHORT}\" -Wait -NoNewWindow"
CUDA_HOME=/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v${CUDA_SHORT}
PATH=${CUDA_HOME}/bin:$PATH
PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH
nvcc --version
fi
# Fix Cuda9.2 on Windows: https://github.com/pytorch/pytorch/issues/6109
if [ "${TRAVIS_OS_NAME}" = "windows" ] && [ "${IDX}" = "cu92" ]; then
sed -i.bak -e '129,141d' "${CUDA_HOME}/include/crt/host_config.h"
fi
......@@ -5,7 +5,7 @@ import glob
import shutil
idx = sys.argv[1]
assert idx in ['cpu', 'cu92', 'cu101', 'cu102']
assert idx in ['cpu', 'cu92', 'cu101', 'cu102', 'cu110']
dist_dir = osp.join(osp.dirname(osp.abspath(__file__)), '..', 'dist')
wheels = glob.glob(osp.join('dist', '**', '*.whl'), recursive=True)
......
#!/bin/bash
# Fix "member may not be initialized" error on Windows: https://github.com/pytorch/pytorch/issues/27958
if [ "${TRAVIS_OS_NAME}" = "windows" ] && [ "${TORCH_VERSION}" = "1.5.0" ]; then
echo "Fix nvcc for PyTorch 1.5.0"
sed -i.bak -e 's/constexpr/const/g' /c/tools/miniconda3/envs/test/lib/site-packages/torch/include/torch/csrc/jit/api/module.h
sed -i.bak -e 's/constexpr/const/g' /c/tools/miniconda3/envs/test/lib/site-packages/torch/include/torch/csrc/jit/runtime/argument_spec.h
sed -i.bak -e 's/return \*(this->value)/return \*((type\*)this->value)/g' /c/tools/miniconda3/envs/test/lib/site-packages/torch/include/pybind11/cast.h
fi
# https://github.com/pytorch/pytorch/commit/d2e16dd888a9b5fd55bd475d4fcffb70f388d4f0
if [ "${TRAVIS_OS_NAME}" = "windows" ] && [ "${TORCH_VERSION}" = "1.6.0" ]; then
echo "Fix nvcc for PyTorch 1.6.0"
if [ "${TRAVIS_OS_NAME}" = "windows" ]; then
echo "Fix nvcc for PyTorch"
sed -i.bak -e 's/CONSTEXPR_EXCEPT_WIN_CUDA/const/g' /c/tools/miniconda3/envs/test/lib/site-packages/torch/include/torch/csrc/jit/api/module.h
sed -i.bak -e 's/return \*(this->value)/return \*((type\*)this->value)/g' /c/tools/miniconda3/envs/test/lib/site-packages/torch/include/pybind11/cast.h
fi
if [ "${TRAVIS_OS_NAME}" = "windows" ] && [ "${TORCH_VERSION}" = "1.7.0" ]; then
echo "Fix nvcc for PyTorch 1.7.0"
sed -i.bak '/static constexpr Symbol Kind/d' /c/tools/miniconda3/envs/test/lib/site-packages/torch/include/torch/csrc/jit/ir/ir.h
fi
import os
import os.path as osp
import sys
import glob
from setuptools import setup, find_packages
import torch
from torch.__config__ import parallel_info
from torch.utils.cpp_extension import BuildExtension
from torch.utils.cpp_extension import CppExtension, CUDAExtension, CUDA_HOME
......@@ -20,6 +22,17 @@ def get_extensions():
Extension = CppExtension
define_macros = []
extra_compile_args = {'cxx': []}
extra_link_args = []
info = parallel_info()
if 'parallel backend: OpenMP' in info and 'OpenMP not found' not in info:
extra_compile_args['cxx'] += ['-DAT_PARALLEL_OPENMP']
if sys.platform == 'win32':
extra_compile_args['cxx'] += ['/openmp']
else:
extra_compile_args['cxx'] += ['-fopenmp']
else:
print('Compiling without OpenMP...')
if WITH_CUDA:
Extension = CUDAExtension
......@@ -51,6 +64,7 @@ def get_extensions():
include_dirs=[extensions_dir],
define_macros=define_macros,
extra_compile_args=extra_compile_args,
extra_link_args=extra_link_args,
)
extensions += [extension]
......@@ -63,7 +77,7 @@ tests_require = ['pytest', 'pytest-cov', 'scipy']
setup(
name='torch_cluster',
version='1.5.7',
version='1.5.8',
author='Matthias Fey',
author_email='matthias.fey@tu-dortmund.de',
url='https://github.com/rusty1s/pytorch_cluster',
......
......@@ -3,7 +3,7 @@ import os.path as osp
import torch
__version__ = '1.5.7'
__version__ = '1.5.8'
for library in [
'_version', '_grid', '_graclus', '_fps', '_rw', '_sampler', '_nearest',
......
......@@ -38,6 +38,7 @@ def nearest(x: torch.Tensor, y: torch.Tensor,
x = x.view(-1, 1) if x.dim() == 1 else x
y = y.view(-1, 1) if y.dim() == 1 else y
assert x.size(1) == y.size(1)
if x.is_cuda:
if batch_x is not None:
......@@ -66,29 +67,24 @@ def nearest(x: torch.Tensor, y: torch.Tensor,
return torch.ops.torch_cluster.nearest(x, y, ptr_x, ptr_y)
else:
if batch_x is None:
batch_x = x.new_zeros(x.size(0), dtype=torch.long)
if batch_y is None:
batch_y = y.new_zeros(y.size(0), dtype=torch.long)
assert x.dim() == 2 and batch_x.dim() == 1
assert y.dim() == 2 and batch_y.dim() == 1
assert x.size(1) == y.size(1)
assert x.size(0) == batch_x.size(0)
assert y.size(0) == batch_y.size(0)
# Translate and rescale x and y to [0, 1].
min_xy = min(x.min().item(), y.min().item())
x, y = x - min_xy, y - min_xy
max_xy = max(x.max().item(), y.max().item())
x.div_(max_xy)
y.div_(max_xy)
# Concat batch/features to ensure no cross-links between examples.
x = torch.cat([x, 2 * x.size(1) * batch_x.view(-1, 1).to(x.dtype)], -1)
y = torch.cat([y, 2 * y.size(1) * batch_y.view(-1, 1).to(y.dtype)], -1)
if batch_x is not None and batch_y is not None:
assert x.dim() == 2 and batch_x.dim() == 1
assert y.dim() == 2 and batch_y.dim() == 1
assert x.size(0) == batch_x.size(0)
assert y.size(0) == batch_y.size(0)
min_xy = min(x.min().item(), y.min().item())
x, y = x - min_xy, y - min_xy
max_xy = max(x.max().item(), y.max().item())
x.div_(max_xy)
y.div_(max_xy)
# Concat batch/features to ensure no cross-links between examples.
D = x.size(-1)
x = torch.cat([x, 2 * D * batch_x.view(-1, 1).to(x.dtype)], -1)
y = torch.cat([y, 2 * D * batch_y.view(-1, 1).to(y.dtype)], -1)
return torch.from_numpy(
scipy.cluster.vq.vq(x.detach().cpu(),
......
import warnings
from typing import Optional
import torch
from torch import Tensor
@torch.jit.script
def random_walk(row: torch.Tensor, col: torch.Tensor, start: torch.Tensor,
walk_length: int, p: float = 1, q: float = 1,
coalesced: bool = True, num_nodes: Optional[int] = None):
def random_walk(row: Tensor, col: Tensor, start: Tensor, walk_length: int,
p: float = 1, q: float = 1, coalesced: bool = True,
num_nodes: Optional[int] = None) -> Tensor:
"""Samples random walks of length :obj:`walk_length` from all node indices
in :obj:`start` in the graph given by :obj:`(row, col)` as described in the
`"node2vec: Scalable Feature Learning for Networks"
......@@ -43,10 +43,5 @@ def random_walk(row: torch.Tensor, col: torch.Tensor, start: torch.Tensor,
rowptr = row.new_zeros(num_nodes + 1)
torch.cumsum(deg, 0, out=rowptr[1:])
if p != 1. or q != 1.: # pragma: no cover
warnings.warn('Parameters `p` and `q` are not supported yet and will'
'be restored to their default values `p=1` and `q=1`.')
p = q = 1.
return torch.ops.torch_cluster.random_walk(rowptr, col, start, walk_length,
p, q)
p, q)[0]
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment