".github/scripts/unittest-linux/run_test.sh" did not exist on "6bc591ec7995481a483079a8b71f5c923e8e0ace"
Commit b73cc994 authored by quyuanhao123's avatar quyuanhao123
Browse files

Initial commit

parents
Pipeline #193 failed with stages
in 0 seconds
#pragma once
#include <torch/extension.h>
std::tuple<torch::Tensor, torch::Tensor>
spline_basis_fw_cuda(torch::Tensor pseudo, torch::Tensor kernel_size,
torch::Tensor is_open_spline, int64_t degree);
torch::Tensor spline_basis_bw_cuda(torch::Tensor grad_basis,
torch::Tensor pseudo,
torch::Tensor kernel_size,
torch::Tensor is_open_spline,
int64_t degree);
#include "hip/hip_runtime.h"
#include "basis_hip.h"
#include <ATen/hip/HIPContext.h>
#include "utils.cuh"
#define THREADS 1024
#define BLOCKS(N) (N + THREADS - 1) / THREADS
template <typename scalar_t, int64_t degree> struct Basis {
static inline __device__ scalar_t forward(scalar_t v, int64_t k_mod) {
if (degree == 1) {
return 1. - v - k_mod + 2. * v * k_mod;
} else if (degree == 2) {
if (k_mod == 0)
return 0.5 * v * v - v + 0.5;
else if (k_mod == 1)
return -v * v + v + 0.5;
else
return 0.5 * v * v;
} else if (degree == 3) {
if (k_mod == 0)
return (1. - v) * (1. - v) * (1. - v) / 6.;
else if (k_mod == 1)
return (3. * v * v * v - 6. * v * v + 4.) / 6.;
else if (k_mod == 2)
return (-3. * v * v * v + 3. * v * v + 3. * v + 1.) / 6.;
else
return v * v * v / 6.;
} else {
return (scalar_t)-1.;
}
}
static inline __device__ scalar_t backward(scalar_t v, int64_t k_mod) {
if (degree == 1) {
return 2 * k_mod - 1;
} else if (degree == 2) {
if (k_mod == 0)
return v - 1.;
else if (k_mod == 1)
return -2. * v + 1.;
else
return v;
} else if (degree == 3) {
if (k_mod == 0)
return (-v * v + 2. * v - 1.) / 2.;
else if (k_mod == 1)
return (3. * v * v - 4. * v) / 2.;
else if (k_mod == 2)
return (-3. * v * v + 2. * v + 1.) / 2.;
else
return v * v / 2.;
} else {
return (scalar_t)-1.;
}
}
};
template <typename scalar_t, int64_t degree>
__global__ void
spline_basis_fw_kernel(const scalar_t *pseudo, const int64_t *kernel_size,
const uint8_t *is_open_spline, scalar_t *basis,
int64_t *weight_index, int64_t E, int64_t D, int64_t S,
int64_t numel) {
const int64_t thread_idx = blockIdx.x * blockDim.x + threadIdx.x;
const int64_t e = thread_idx / S;
const int64_t s = thread_idx % S;
if (thread_idx < numel) {
int64_t k = s, wi = 0, wi_offset = 1;
scalar_t b = (scalar_t)1.;
for (int64_t d = 0; d < D; d++) {
const int64_t k_mod = k % (degree + 1);
k /= degree + 1;
scalar_t v = pseudo[e * D + d];
v *= kernel_size[d] - degree * is_open_spline[d];
wi += (((int64_t)v + k_mod) % kernel_size[d]) * wi_offset;
wi_offset *= kernel_size[d];
v -= floor(v);
v = Basis<scalar_t, degree>::forward(v, k_mod);
b *= v;
}
basis[thread_idx] = b;
weight_index[thread_idx] = wi;
}
}
std::tuple<torch::Tensor, torch::Tensor>
spline_basis_fw_cuda(torch::Tensor pseudo, torch::Tensor kernel_size,
torch::Tensor is_open_spline, int64_t degree) {
CHECK_CUDA(pseudo);
CHECK_CUDA(kernel_size);
CHECK_CUDA(is_open_spline);
hipSetDevice(pseudo.get_device());
CHECK_INPUT(kernel_size.dim() == 1);
CHECK_INPUT(pseudo.size(1) == kernel_size.numel());
CHECK_INPUT(is_open_spline.dim());
CHECK_INPUT(pseudo.size(1) == is_open_spline.numel());
auto E = pseudo.size(0);
auto D = pseudo.size(1);
auto S = (int64_t)(powf(degree + 1, D) + 0.5);
auto basis = at::empty({E, S}, pseudo.options());
auto weight_index = at::empty({E, S}, kernel_size.options());
auto kernel_size_data = kernel_size.data_ptr<int64_t>();
auto is_open_spline_data = is_open_spline.data_ptr<uint8_t>();
auto weight_index_data = weight_index.data_ptr<int64_t>();
auto stream = at::cuda::getCurrentCUDAStream();
AT_DISPATCH_FLOATING_TYPES(pseudo.scalar_type(), "basis_fw", [&] {
auto pseudo_data = pseudo.data_ptr<scalar_t>();
auto basis_data = basis.data_ptr<scalar_t>();
AT_DISPATCH_DEGREE_TYPES(degree, [&] {
spline_basis_fw_kernel<scalar_t, DEGREE>
<<<BLOCKS(basis.numel()), THREADS, 0, stream>>>(
pseudo_data, kernel_size_data, is_open_spline_data, basis_data,
weight_index_data, E, D, S, basis.numel());
});
});
return std::make_tuple(basis, weight_index);
}
template <typename scalar_t, int64_t degree>
__global__ void
spline_basis_bw_kernel(const scalar_t *grad_basis, const scalar_t *pseudo,
const int64_t *kernel_size,
const uint8_t *is_open_spline, scalar_t *grad_pseudo,
int64_t E, int64_t D, int64_t S, int64_t numel) {
const int64_t thread_idx = blockIdx.x * blockDim.x + threadIdx.x;
const int64_t e = thread_idx / D;
const int64_t d = thread_idx % D;
if (thread_idx < numel) {
scalar_t g = (scalar_t)0., tmp;
for (ptrdiff_t s = 0; s < S; s++) {
int64_t k_mod = (s / (int64_t)(powf(degree + 1, d) + 0.5)) % (degree + 1);
scalar_t v = pseudo[e * D + d];
v *= kernel_size[d] - degree * is_open_spline[d];
v -= floor(v);
v = Basis<scalar_t, degree>::backward(v, k_mod);
tmp = v;
for (int64_t d_it = 1; d_it < D; d_it++) {
const int64_t d_new = d_it - (d >= d_it);
k_mod = (s / (int64_t)(powf(degree + 1, d_new) + 0.5)) % (degree + 1);
v = pseudo[e * D + d_new];
v *= kernel_size[d_new] - degree * is_open_spline[d_new];
v -= floor(v);
v = Basis<scalar_t, degree>::forward(v, k_mod);
tmp *= v;
}
g += tmp * grad_basis[e * S + s];
}
g *= kernel_size[d] - degree * is_open_spline[d];
grad_pseudo[thread_idx] = g;
}
}
torch::Tensor spline_basis_bw_cuda(torch::Tensor grad_basis,
torch::Tensor pseudo,
torch::Tensor kernel_size,
torch::Tensor is_open_spline,
int64_t degree) {
CHECK_CUDA(grad_basis);
CHECK_CUDA(pseudo);
CHECK_CUDA(kernel_size);
CHECK_CUDA(is_open_spline);
hipSetDevice(grad_basis.get_device());
CHECK_INPUT(grad_basis.size(0) == pseudo.size(0));
CHECK_INPUT(kernel_size.dim() == 1);
CHECK_INPUT(pseudo.size(1) == kernel_size.numel());
CHECK_INPUT(is_open_spline.dim());
CHECK_INPUT(pseudo.size(1) == is_open_spline.numel());
auto E = pseudo.size(0);
auto D = pseudo.size(1);
auto S = grad_basis.size(1);
auto grad_pseudo = at::empty({E, D}, pseudo.options());
auto kernel_size_data = kernel_size.data_ptr<int64_t>();
auto is_open_spline_data = is_open_spline.data_ptr<uint8_t>();
auto stream = at::cuda::getCurrentCUDAStream();
AT_DISPATCH_FLOATING_TYPES(pseudo.scalar_type(), "basis_bw", [&] {
auto grad_basis_data = grad_basis.data_ptr<scalar_t>();
auto pseudo_data = pseudo.data_ptr<scalar_t>();
auto grad_pseudo_data = grad_pseudo.data_ptr<scalar_t>();
AT_DISPATCH_DEGREE_TYPES(degree, [&] {
spline_basis_bw_kernel<scalar_t, DEGREE>
<<<BLOCKS(grad_pseudo.numel()), THREADS, 0, stream>>>(
grad_basis_data, pseudo_data, kernel_size_data,
is_open_spline_data, grad_pseudo_data, E, D, S,
grad_pseudo.numel());
});
});
return grad_pseudo;
}
#include "hip/hip_runtime.h"
#include "basis_hip.h"
#include <ATen/hip/HIPContext.h>
#include "utils.cuh"
#define THREADS 1024
#define BLOCKS(N) (N + THREADS - 1) / THREADS
template <typename scalar_t, int64_t degree> struct Basis {
static inline __device__ scalar_t forward(scalar_t v, int64_t k_mod) {
if (degree == 1) {
return 1. - v - k_mod + 2. * v * k_mod;
} else if (degree == 2) {
if (k_mod == 0)
return 0.5 * v * v - v + 0.5;
else if (k_mod == 1)
return -v * v + v + 0.5;
else
return 0.5 * v * v;
} else if (degree == 3) {
if (k_mod == 0)
return (1. - v) * (1. - v) * (1. - v) / 6.;
else if (k_mod == 1)
return (3. * v * v * v - 6. * v * v + 4.) / 6.;
else if (k_mod == 2)
return (-3. * v * v * v + 3. * v * v + 3. * v + 1.) / 6.;
else
return v * v * v / 6.;
} else {
return (scalar_t)-1.;
}
}
static inline __device__ scalar_t backward(scalar_t v, int64_t k_mod) {
if (degree == 1) {
return 2 * k_mod - 1;
} else if (degree == 2) {
if (k_mod == 0)
return v - 1.;
else if (k_mod == 1)
return -2. * v + 1.;
else
return v;
} else if (degree == 3) {
if (k_mod == 0)
return (-v * v + 2. * v - 1.) / 2.;
else if (k_mod == 1)
return (3. * v * v - 4. * v) / 2.;
else if (k_mod == 2)
return (-3. * v * v + 2. * v + 1.) / 2.;
else
return v * v / 2.;
} else {
return (scalar_t)-1.;
}
}
};
template <typename scalar_t, int64_t degree>
__global__ void
spline_basis_fw_kernel(const scalar_t *pseudo, const int64_t *kernel_size,
const uint8_t *is_open_spline, scalar_t *basis,
int64_t *weight_index, int64_t E, int64_t D, int64_t S,
int64_t numel) {
const int64_t thread_idx = blockIdx.x * blockDim.x + threadIdx.x;
const int64_t e = thread_idx / S;
const int64_t s = thread_idx % S;
if (thread_idx < numel) {
int64_t k = s, wi = 0, wi_offset = 1;
scalar_t b = (scalar_t)1.;
for (int64_t d = 0; d < D; d++) {
const int64_t k_mod = k % (degree + 1);
k /= degree + 1;
scalar_t v = pseudo[e * D + d];
v *= kernel_size[d] - degree * is_open_spline[d];
wi += (((int64_t)v + k_mod) % kernel_size[d]) * wi_offset;
wi_offset *= kernel_size[d];
v -= floor(v);
v = Basis<scalar_t, degree>::forward(v, k_mod);
b *= v;
}
basis[thread_idx] = b;
weight_index[thread_idx] = wi;
}
}
std::tuple<torch::Tensor, torch::Tensor>
spline_basis_fw_cuda(torch::Tensor pseudo, torch::Tensor kernel_size,
torch::Tensor is_open_spline, int64_t degree) {
CHECK_CUDA(pseudo);
CHECK_CUDA(kernel_size);
CHECK_CUDA(is_open_spline);
hipSetDevice(pseudo.get_device());
CHECK_INPUT(kernel_size.dim() == 1);
CHECK_INPUT(pseudo.size(1) == kernel_size.numel());
CHECK_INPUT(is_open_spline.dim());
CHECK_INPUT(pseudo.size(1) == is_open_spline.numel());
auto E = pseudo.size(0);
auto D = pseudo.size(1);
auto S = (int64_t)(powf(degree + 1, D) + 0.5);
auto basis = at::empty({E, S}, pseudo.options());
auto weight_index = at::empty({E, S}, kernel_size.options());
auto kernel_size_data = kernel_size.data_ptr<int64_t>();
auto is_open_spline_data = is_open_spline.data_ptr<uint8_t>();
auto weight_index_data = weight_index.data_ptr<int64_t>();
auto stream = at::hip::getCurrentHIPStreamMasqueradingAsCUDA();
AT_DISPATCH_FLOATING_TYPES(pseudo.scalar_type(), "basis_fw", [&] {
auto pseudo_data = pseudo.data_ptr<scalar_t>();
auto basis_data = basis.data_ptr<scalar_t>();
AT_DISPATCH_DEGREE_TYPES(degree, [&] {
hipLaunchKernelGGL(( spline_basis_fw_kernel<scalar_t, DEGREE>)
, dim3(BLOCKS(basis.numel())), dim3(THREADS), 0, stream,
pseudo_data, kernel_size_data, is_open_spline_data, basis_data,
weight_index_data, E, D, S, basis.numel());
});
});
return std::make_tuple(basis, weight_index);
}
template <typename scalar_t, int64_t degree>
__global__ void
spline_basis_bw_kernel(const scalar_t *grad_basis, const scalar_t *pseudo,
const int64_t *kernel_size,
const uint8_t *is_open_spline, scalar_t *grad_pseudo,
int64_t E, int64_t D, int64_t S, int64_t numel) {
const int64_t thread_idx = blockIdx.x * blockDim.x + threadIdx.x;
const int64_t e = thread_idx / D;
const int64_t d = thread_idx % D;
if (thread_idx < numel) {
scalar_t g = (scalar_t)0., tmp;
for (ptrdiff_t s = 0; s < S; s++) {
int64_t k_mod = (s / (int64_t)(powf(degree + 1, d) + 0.5)) % (degree + 1);
scalar_t v = pseudo[e * D + d];
v *= kernel_size[d] - degree * is_open_spline[d];
v -= floor(v);
v = Basis<scalar_t, degree>::backward(v, k_mod);
tmp = v;
for (int64_t d_it = 1; d_it < D; d_it++) {
const int64_t d_new = d_it - (d >= d_it);
k_mod = (s / (int64_t)(powf(degree + 1, d_new) + 0.5)) % (degree + 1);
v = pseudo[e * D + d_new];
v *= kernel_size[d_new] - degree * is_open_spline[d_new];
v -= floor(v);
v = Basis<scalar_t, degree>::forward(v, k_mod);
tmp *= v;
}
g += tmp * grad_basis[e * S + s];
}
g *= kernel_size[d] - degree * is_open_spline[d];
grad_pseudo[thread_idx] = g;
}
}
torch::Tensor spline_basis_bw_cuda(torch::Tensor grad_basis,
torch::Tensor pseudo,
torch::Tensor kernel_size,
torch::Tensor is_open_spline,
int64_t degree) {
CHECK_CUDA(grad_basis);
CHECK_CUDA(pseudo);
CHECK_CUDA(kernel_size);
CHECK_CUDA(is_open_spline);
hipSetDevice(grad_basis.get_device());
CHECK_INPUT(grad_basis.size(0) == pseudo.size(0));
CHECK_INPUT(kernel_size.dim() == 1);
CHECK_INPUT(pseudo.size(1) == kernel_size.numel());
CHECK_INPUT(is_open_spline.dim());
CHECK_INPUT(pseudo.size(1) == is_open_spline.numel());
auto E = pseudo.size(0);
auto D = pseudo.size(1);
auto S = grad_basis.size(1);
auto grad_pseudo = at::empty({E, D}, pseudo.options());
auto kernel_size_data = kernel_size.data_ptr<int64_t>();
auto is_open_spline_data = is_open_spline.data_ptr<uint8_t>();
auto stream = at::hip::getCurrentHIPStreamMasqueradingAsCUDA();
AT_DISPATCH_FLOATING_TYPES(pseudo.scalar_type(), "basis_bw", [&] {
auto grad_basis_data = grad_basis.data_ptr<scalar_t>();
auto pseudo_data = pseudo.data_ptr<scalar_t>();
auto grad_pseudo_data = grad_pseudo.data_ptr<scalar_t>();
AT_DISPATCH_DEGREE_TYPES(degree, [&] {
hipLaunchKernelGGL(( spline_basis_bw_kernel<scalar_t, DEGREE>)
, dim3(BLOCKS(grad_pseudo.numel())), dim3(THREADS), 0, stream,
grad_basis_data, pseudo_data, kernel_size_data,
is_open_spline_data, grad_pseudo_data, E, D, S,
grad_pseudo.numel());
});
});
return grad_pseudo;
}
#pragma once
#include <torch/extension.h>
#define CHECK_CUDA(x) \
AT_ASSERTM(x.device().is_cuda(), #x " must be CUDA tensor")
#define CHECK_INPUT(x) AT_ASSERTM(x, "Input mismatch")
#define AT_DISPATCH_DEGREE_TYPES(degree, ...) \
[&] { \
switch (degree) { \
case 1: { \
const int64_t DEGREE = 1; \
return __VA_ARGS__(); \
} \
case 2: { \
const int64_t DEGREE = 2; \
return __VA_ARGS__(); \
} \
case 3: { \
const int64_t DEGREE = 3; \
return __VA_ARGS__(); \
} \
default: \
AT_ERROR("Basis degree not implemented"); \
} \
}()
#pragma once
#include <torch/extension.h>
torch::Tensor spline_weighting_fw_cuda(torch::Tensor x, torch::Tensor weight,
torch::Tensor basis,
torch::Tensor weight_index);
torch::Tensor spline_weighting_bw_x_cuda(torch::Tensor grad_out,
torch::Tensor weight,
torch::Tensor basis,
torch::Tensor weight_index);
torch::Tensor spline_weighting_bw_weight_cuda(torch::Tensor grad_out,
torch::Tensor x,
torch::Tensor basis,
torch::Tensor weight_index,
int64_t kernel_size);
torch::Tensor spline_weighting_bw_basis_cuda(torch::Tensor grad_out,
torch::Tensor x,
torch::Tensor weight,
torch::Tensor weight_index);
#include "hip/hip_runtime.h"
#include "weighting_hip.h"
#include <ATen/hip/HIPContext.h>
#include "atomics.cuh"
#include "utils.cuh"
#define THREADS 1024
#define BLOCKS(N) (N + THREADS - 1) / THREADS
template <typename scalar_t>
__global__ void
spline_weighting_fw_kernel(const scalar_t *x, const scalar_t *weight,
const scalar_t *basis, const int64_t *weight_index,
scalar_t *out, int64_t E, int64_t M_in,
int64_t M_out, int64_t S, int64_t numel) {
const int64_t thread_idx = blockIdx.x * blockDim.x + threadIdx.x;
const int64_t e = thread_idx / M_out;
const int64_t m_out = thread_idx % M_out;
if (thread_idx < numel) {
scalar_t v = (scalar_t)0.;
for (ptrdiff_t s = 0; s < S; s++) {
const scalar_t b = basis[e * S + s];
const int64_t wi = weight_index[e * S + s];
for (int64_t m_in = 0; m_in < M_in; m_in++) {
scalar_t tmp = weight[wi * M_in * M_out + m_in * M_out + m_out];
tmp *= b * x[e * M_in + m_in];
v += tmp;
}
}
out[thread_idx] = v;
}
}
torch::Tensor spline_weighting_fw_cuda(torch::Tensor x, torch::Tensor weight,
torch::Tensor basis,
torch::Tensor weight_index) {
CHECK_CUDA(x);
CHECK_CUDA(weight);
CHECK_CUDA(basis);
CHECK_CUDA(weight_index);
hipSetDevice(x.get_device());
CHECK_INPUT(x.size(1) == weight.size(1));
auto E = x.size(0);
auto M_in = x.size(1);
auto M_out = weight.size(2);
auto S = basis.size(1);
auto out = at::empty({E, M_out}, x.options());
auto weight_index_data = weight_index.data_ptr<int64_t>();
auto stream = at::cuda::getCurrentCUDAStream();
AT_DISPATCH_FLOATING_TYPES(x.scalar_type(), "weighting_fw", [&] {
auto x_data = x.data_ptr<scalar_t>();
auto weight_data = weight.data_ptr<scalar_t>();
auto basis_data = basis.data_ptr<scalar_t>();
auto out_data = out.data_ptr<scalar_t>();
spline_weighting_fw_kernel<scalar_t>
<<<BLOCKS(out.numel()), THREADS, 0, stream>>>(
x_data, weight_data, basis_data, weight_index_data, out_data, E,
M_in, M_out, S, out.numel());
});
return out;
}
template <typename scalar_t>
__global__ void
spline_weighting_bw_x_kernel(const scalar_t *grad_out, const scalar_t *weight,
const scalar_t *basis, const int64_t *weight_index,
scalar_t *grad_x, int64_t E, int64_t M_in,
int64_t M_out, int64_t S, int64_t numel) {
const int64_t thread_idx = blockIdx.x * blockDim.x + threadIdx.x;
const int64_t e = thread_idx / M_in;
const int64_t m_in = thread_idx % M_in;
if (thread_idx < numel) {
scalar_t v = (scalar_t)0.;
for (int64_t s = 0; s < S; s++) {
const scalar_t b = basis[e * S + s];
const int64_t wi = weight_index[e * S + s];
for (int64_t m_out = 0; m_out < M_out; m_out++) {
scalar_t tmp = weight[wi * M_out * M_in + m_out * M_in + m_in];
tmp *= b * grad_out[e * M_out + m_out];
v += tmp;
}
}
grad_x[thread_idx] = v;
}
}
torch::Tensor spline_weighting_bw_x_cuda(torch::Tensor grad_out,
torch::Tensor weight,
torch::Tensor basis,
torch::Tensor weight_index) {
CHECK_CUDA(grad_out);
CHECK_CUDA(weight);
CHECK_CUDA(basis);
CHECK_CUDA(weight_index);
hipSetDevice(grad_out.get_device());
CHECK_INPUT(grad_out.size(1) == weight.size(2));
auto E = grad_out.size(0);
auto M_in = weight.size(1);
auto M_out = grad_out.size(1);
auto S = basis.size(1);
auto grad_x = at::zeros({E, M_in}, grad_out.options());
weight = weight.transpose(1, 2).contiguous(); // Contiguous memory-access.
auto weight_index_data = weight_index.data_ptr<int64_t>();
auto stream = at::cuda::getCurrentCUDAStream();
AT_DISPATCH_FLOATING_TYPES(grad_out.scalar_type(), "weighting_bw_x", [&] {
auto grad_out_data = grad_out.data_ptr<scalar_t>();
auto weight_data = weight.data_ptr<scalar_t>();
auto basis_data = basis.data_ptr<scalar_t>();
auto grad_x_data = grad_x.data_ptr<scalar_t>();
spline_weighting_bw_x_kernel<scalar_t>
<<<BLOCKS(grad_x.numel()), THREADS, 0, stream>>>(
grad_out_data, weight_data, basis_data, weight_index_data,
grad_x_data, E, M_in, M_out, S, grad_x.numel());
});
return grad_x;
}
template <typename scalar_t>
__global__ void spline_weighting_bw_weight_kernel(
const scalar_t *grad_out, const scalar_t *x, const scalar_t *basis,
const int64_t *weight_index, scalar_t *grad_weight, int64_t E, int64_t M_in,
int64_t M_out, int64_t S, int64_t numel) {
const int64_t thread_idx = blockIdx.x * blockDim.x + threadIdx.x;
const int64_t e = thread_idx / M_out;
const int64_t m_out = thread_idx % M_out;
if (thread_idx < numel) {
auto g = grad_out[e * M_out + m_out];
for (int64_t s = 0; s < S; s++) {
const scalar_t b = basis[e * S + s];
const int64_t wi = weight_index[e * S + s];
for (int64_t m_in = 0; m_in < M_in; m_in++) {
auto v = g * b * x[e * M_in + m_in];
atomAdd(&grad_weight[wi * M_in * M_out + m_in * M_out + m_out], v);
}
}
}
}
torch::Tensor spline_weighting_bw_weight_cuda(torch::Tensor grad_out,
torch::Tensor x,
torch::Tensor basis,
torch::Tensor weight_index,
int64_t kernel_size) {
CHECK_CUDA(grad_out);
CHECK_CUDA(x);
CHECK_CUDA(basis);
CHECK_CUDA(weight_index);
hipSetDevice(grad_out.get_device());
auto E = grad_out.size(0);
auto M_in = x.size(1);
auto M_out = grad_out.size(1);
auto S = basis.size(1);
auto grad_weight = at::zeros({kernel_size, M_in, M_out}, grad_out.options());
auto weight_index_data = weight_index.data_ptr<int64_t>();
auto stream = at::cuda::getCurrentCUDAStream();
AT_DISPATCH_FLOATING_TYPES(x.scalar_type(), "weighting_bw_weight", [&] {
auto grad_out_data = grad_out.data_ptr<scalar_t>();
auto x_data = x.data_ptr<scalar_t>();
auto basis_data = basis.data_ptr<scalar_t>();
auto grad_weight_data = grad_weight.data_ptr<scalar_t>();
spline_weighting_bw_weight_kernel<scalar_t>
<<<BLOCKS(grad_out.numel()), THREADS, 0, stream>>>(
grad_out_data, x_data, basis_data, weight_index_data,
grad_weight_data, E, M_in, M_out, S, grad_out.numel());
});
return grad_weight;
}
template <typename scalar_t>
__global__ void spline_weighting_bw_basis_kernel(
const scalar_t *grad_out, const scalar_t *x, const scalar_t *weight,
const int64_t *weight_index, scalar_t *grad_basis, int64_t E, int64_t M_in,
int64_t M_out, int64_t S, int64_t numel) {
const size_t thread_idx = blockIdx.x * blockDim.x + threadIdx.x;
const int64_t e = thread_idx / M_out;
const int64_t m_out = thread_idx % M_out;
if (thread_idx < numel) {
const scalar_t g = grad_out[e * M_out + m_out];
for (int64_t s = 0; s < S; s++) {
scalar_t v = (scalar_t)0.;
const int64_t wi = weight_index[e * S + s];
for (int64_t m_in = 0; m_in < M_in; m_in++) {
const scalar_t w = weight[wi * M_in * M_out + m_in * M_out + m_out];
v += g * w * x[e * M_in + m_in];
}
atomAdd(&grad_basis[e * S + s], v);
}
}
}
torch::Tensor spline_weighting_bw_basis_cuda(torch::Tensor grad_out,
torch::Tensor x,
torch::Tensor weight,
torch::Tensor weight_index) {
CHECK_CUDA(grad_out);
CHECK_CUDA(x);
CHECK_CUDA(weight);
CHECK_CUDA(weight_index);
hipSetDevice(grad_out.get_device());
CHECK_INPUT(x.size(1) == weight.size(1));
CHECK_INPUT(grad_out.size(1) == weight.size(2));
auto E = grad_out.size(0);
auto M_in = x.size(1);
auto M_out = grad_out.size(1);
auto S = weight_index.size(1);
auto grad_basis = at::zeros({E, S}, grad_out.options());
auto weight_index_data = weight_index.data_ptr<int64_t>();
auto stream = at::cuda::getCurrentCUDAStream();
AT_DISPATCH_FLOATING_TYPES(x.scalar_type(), "weighting_bw_basis", [&] {
auto grad_out_data = grad_out.data_ptr<scalar_t>();
auto x_data = x.data_ptr<scalar_t>();
auto weight_data = weight.data_ptr<scalar_t>();
auto grad_basis_data = grad_basis.data_ptr<scalar_t>();
spline_weighting_bw_basis_kernel<scalar_t>
<<<BLOCKS(grad_out.numel()), THREADS, 0, stream>>>(
grad_out_data, x_data, weight_data, weight_index_data,
grad_basis_data, E, M_in, M_out, S, grad_out.numel());
});
return grad_basis;
}
#include "hip/hip_runtime.h"
#include "weighting_hip.h"
#include <ATen/hip/HIPContext.h>
#include "atomics.cuh"
#include "utils.cuh"
#define THREADS 1024
#define BLOCKS(N) (N + THREADS - 1) / THREADS
template <typename scalar_t>
__global__ void
spline_weighting_fw_kernel(const scalar_t *x, const scalar_t *weight,
const scalar_t *basis, const int64_t *weight_index,
scalar_t *out, int64_t E, int64_t M_in,
int64_t M_out, int64_t S, int64_t numel) {
const int64_t thread_idx = blockIdx.x * blockDim.x + threadIdx.x;
const int64_t e = thread_idx / M_out;
const int64_t m_out = thread_idx % M_out;
if (thread_idx < numel) {
scalar_t v = (scalar_t)0.;
for (ptrdiff_t s = 0; s < S; s++) {
const scalar_t b = basis[e * S + s];
const int64_t wi = weight_index[e * S + s];
for (int64_t m_in = 0; m_in < M_in; m_in++) {
scalar_t tmp = weight[wi * M_in * M_out + m_in * M_out + m_out];
tmp *= b * x[e * M_in + m_in];
v += tmp;
}
}
out[thread_idx] = v;
}
}
torch::Tensor spline_weighting_fw_cuda(torch::Tensor x, torch::Tensor weight,
torch::Tensor basis,
torch::Tensor weight_index) {
CHECK_CUDA(x);
CHECK_CUDA(weight);
CHECK_CUDA(basis);
CHECK_CUDA(weight_index);
hipSetDevice(x.get_device());
CHECK_INPUT(x.size(1) == weight.size(1));
auto E = x.size(0);
auto M_in = x.size(1);
auto M_out = weight.size(2);
auto S = basis.size(1);
auto out = at::empty({E, M_out}, x.options());
auto weight_index_data = weight_index.data_ptr<int64_t>();
auto stream = at::hip::getCurrentHIPStreamMasqueradingAsCUDA();
AT_DISPATCH_FLOATING_TYPES(x.scalar_type(), "weighting_fw", [&] {
auto x_data = x.data_ptr<scalar_t>();
auto weight_data = weight.data_ptr<scalar_t>();
auto basis_data = basis.data_ptr<scalar_t>();
auto out_data = out.data_ptr<scalar_t>();
hipLaunchKernelGGL(( spline_weighting_fw_kernel<scalar_t>)
, dim3(BLOCKS(out.numel())), dim3(THREADS), 0, stream,
x_data, weight_data, basis_data, weight_index_data, out_data, E,
M_in, M_out, S, out.numel());
});
return out;
}
template <typename scalar_t>
__global__ void
spline_weighting_bw_x_kernel(const scalar_t *grad_out, const scalar_t *weight,
const scalar_t *basis, const int64_t *weight_index,
scalar_t *grad_x, int64_t E, int64_t M_in,
int64_t M_out, int64_t S, int64_t numel) {
const int64_t thread_idx = blockIdx.x * blockDim.x + threadIdx.x;
const int64_t e = thread_idx / M_in;
const int64_t m_in = thread_idx % M_in;
if (thread_idx < numel) {
scalar_t v = (scalar_t)0.;
for (int64_t s = 0; s < S; s++) {
const scalar_t b = basis[e * S + s];
const int64_t wi = weight_index[e * S + s];
for (int64_t m_out = 0; m_out < M_out; m_out++) {
scalar_t tmp = weight[wi * M_out * M_in + m_out * M_in + m_in];
tmp *= b * grad_out[e * M_out + m_out];
v += tmp;
}
}
grad_x[thread_idx] = v;
}
}
torch::Tensor spline_weighting_bw_x_cuda(torch::Tensor grad_out,
torch::Tensor weight,
torch::Tensor basis,
torch::Tensor weight_index) {
CHECK_CUDA(grad_out);
CHECK_CUDA(weight);
CHECK_CUDA(basis);
CHECK_CUDA(weight_index);
hipSetDevice(grad_out.get_device());
CHECK_INPUT(grad_out.size(1) == weight.size(2));
auto E = grad_out.size(0);
auto M_in = weight.size(1);
auto M_out = grad_out.size(1);
auto S = basis.size(1);
auto grad_x = at::zeros({E, M_in}, grad_out.options());
weight = weight.transpose(1, 2).contiguous(); // Contiguous memory-access.
auto weight_index_data = weight_index.data_ptr<int64_t>();
auto stream = at::hip::getCurrentHIPStreamMasqueradingAsCUDA();
AT_DISPATCH_FLOATING_TYPES(grad_out.scalar_type(), "weighting_bw_x", [&] {
auto grad_out_data = grad_out.data_ptr<scalar_t>();
auto weight_data = weight.data_ptr<scalar_t>();
auto basis_data = basis.data_ptr<scalar_t>();
auto grad_x_data = grad_x.data_ptr<scalar_t>();
hipLaunchKernelGGL(( spline_weighting_bw_x_kernel<scalar_t>)
, dim3(BLOCKS(grad_x.numel())), dim3(THREADS), 0, stream,
grad_out_data, weight_data, basis_data, weight_index_data,
grad_x_data, E, M_in, M_out, S, grad_x.numel());
});
return grad_x;
}
template <typename scalar_t>
__global__ void spline_weighting_bw_weight_kernel(
const scalar_t *grad_out, const scalar_t *x, const scalar_t *basis,
const int64_t *weight_index, scalar_t *grad_weight, int64_t E, int64_t M_in,
int64_t M_out, int64_t S, int64_t numel) {
const int64_t thread_idx = blockIdx.x * blockDim.x + threadIdx.x;
const int64_t e = thread_idx / M_out;
const int64_t m_out = thread_idx % M_out;
if (thread_idx < numel) {
auto g = grad_out[e * M_out + m_out];
for (int64_t s = 0; s < S; s++) {
const scalar_t b = basis[e * S + s];
const int64_t wi = weight_index[e * S + s];
for (int64_t m_in = 0; m_in < M_in; m_in++) {
auto v = g * b * x[e * M_in + m_in];
atomAdd(&grad_weight[wi * M_in * M_out + m_in * M_out + m_out], v);
}
}
}
}
torch::Tensor spline_weighting_bw_weight_cuda(torch::Tensor grad_out,
torch::Tensor x,
torch::Tensor basis,
torch::Tensor weight_index,
int64_t kernel_size) {
CHECK_CUDA(grad_out);
CHECK_CUDA(x);
CHECK_CUDA(basis);
CHECK_CUDA(weight_index);
hipSetDevice(grad_out.get_device());
auto E = grad_out.size(0);
auto M_in = x.size(1);
auto M_out = grad_out.size(1);
auto S = basis.size(1);
auto grad_weight = at::zeros({kernel_size, M_in, M_out}, grad_out.options());
auto weight_index_data = weight_index.data_ptr<int64_t>();
auto stream = at::hip::getCurrentHIPStreamMasqueradingAsCUDA();
AT_DISPATCH_FLOATING_TYPES(x.scalar_type(), "weighting_bw_weight", [&] {
auto grad_out_data = grad_out.data_ptr<scalar_t>();
auto x_data = x.data_ptr<scalar_t>();
auto basis_data = basis.data_ptr<scalar_t>();
auto grad_weight_data = grad_weight.data_ptr<scalar_t>();
hipLaunchKernelGGL(( spline_weighting_bw_weight_kernel<scalar_t>)
, dim3(BLOCKS(grad_out.numel())), dim3(THREADS), 0, stream,
grad_out_data, x_data, basis_data, weight_index_data,
grad_weight_data, E, M_in, M_out, S, grad_out.numel());
});
return grad_weight;
}
template <typename scalar_t>
__global__ void spline_weighting_bw_basis_kernel(
const scalar_t *grad_out, const scalar_t *x, const scalar_t *weight,
const int64_t *weight_index, scalar_t *grad_basis, int64_t E, int64_t M_in,
int64_t M_out, int64_t S, int64_t numel) {
const size_t thread_idx = blockIdx.x * blockDim.x + threadIdx.x;
const int64_t e = thread_idx / M_out;
const int64_t m_out = thread_idx % M_out;
if (thread_idx < numel) {
const scalar_t g = grad_out[e * M_out + m_out];
for (int64_t s = 0; s < S; s++) {
scalar_t v = (scalar_t)0.;
const int64_t wi = weight_index[e * S + s];
for (int64_t m_in = 0; m_in < M_in; m_in++) {
const scalar_t w = weight[wi * M_in * M_out + m_in * M_out + m_out];
v += g * w * x[e * M_in + m_in];
}
atomAdd(&grad_basis[e * S + s], v);
}
}
}
torch::Tensor spline_weighting_bw_basis_cuda(torch::Tensor grad_out,
torch::Tensor x,
torch::Tensor weight,
torch::Tensor weight_index) {
CHECK_CUDA(grad_out);
CHECK_CUDA(x);
CHECK_CUDA(weight);
CHECK_CUDA(weight_index);
hipSetDevice(grad_out.get_device());
CHECK_INPUT(x.size(1) == weight.size(1));
CHECK_INPUT(grad_out.size(1) == weight.size(2));
auto E = grad_out.size(0);
auto M_in = x.size(1);
auto M_out = grad_out.size(1);
auto S = weight_index.size(1);
auto grad_basis = at::zeros({E, S}, grad_out.options());
auto weight_index_data = weight_index.data_ptr<int64_t>();
auto stream = at::hip::getCurrentHIPStreamMasqueradingAsCUDA();
AT_DISPATCH_FLOATING_TYPES(x.scalar_type(), "weighting_bw_basis", [&] {
auto grad_out_data = grad_out.data_ptr<scalar_t>();
auto x_data = x.data_ptr<scalar_t>();
auto weight_data = weight.data_ptr<scalar_t>();
auto grad_basis_data = grad_basis.data_ptr<scalar_t>();
hipLaunchKernelGGL(( spline_weighting_bw_basis_kernel<scalar_t>)
, dim3(BLOCKS(grad_out.numel())), dim3(THREADS), 0, stream,
grad_out_data, x_data, weight_data, weight_index_data,
grad_basis_data, E, M_in, M_out, S, grad_out.numel());
});
return grad_basis;
}
#pragma once
#include <torch/extension.h>
int64_t cuda_version();
std::tuple<torch::Tensor, torch::Tensor>
spline_basis(torch::Tensor pseudo, torch::Tensor kernel_size,
torch::Tensor is_open_spline, int64_t degree);
torch::Tensor spline_weighting(torch::Tensor x, torch::Tensor weight,
torch::Tensor basis, torch::Tensor weight_index);
#include <Python.h>
#include <torch/script.h>
#ifdef WITH_HIP
#include <hip/hip_runtime.h>
#endif
#ifdef _WIN32
#ifdef WITH_HIP
PyMODINIT_FUNC PyInit__version_cuda(void) { return NULL; }
#else
PyMODINIT_FUNC PyInit__version_cpu(void) { return NULL; }
#endif
#endif
int64_t cuda_version() {
#ifdef WITH_HIP
return TORCH_HIP_VERSION;
#else
return -1;
#endif
}
static auto registry = torch::RegisterOperators().op(
"torch_spline_conv::cuda_version", &cuda_version);
#include <Python.h>
#include <torch/script.h>
#include "cpu/weighting_cpu.h"
#ifdef WITH_HIP
#include "hip/weighting_hip.h"
#endif
#ifdef _WIN32
#ifdef WITH_HIP
PyMODINIT_FUNC PyInit__weighting_cuda(void) { return NULL; }
#else
PyMODINIT_FUNC PyInit__weighting_cpu(void) { return NULL; }
#endif
#endif
torch::Tensor spline_weighting_fw(torch::Tensor x, torch::Tensor weight,
torch::Tensor basis,
torch::Tensor weight_index) {
if (x.device().is_cuda()) {
#ifdef WITH_HIP
return spline_weighting_fw_cuda(x, weight, basis, weight_index);
#else
AT_ERROR("Not compiled with CUDA support");
#endif
} else {
return spline_weighting_fw_cpu(x, weight, basis, weight_index);
}
}
torch::Tensor spline_weighting_bw_x(torch::Tensor grad_out,
torch::Tensor weight, torch::Tensor basis,
torch::Tensor weight_index) {
if (grad_out.device().is_cuda()) {
#ifdef WITH_HIP
return spline_weighting_bw_x_cuda(grad_out, weight, basis, weight_index);
#else
AT_ERROR("Not compiled with CUDA support");
#endif
} else {
return spline_weighting_bw_x_cpu(grad_out, weight, basis, weight_index);
}
}
torch::Tensor spline_weighting_bw_weight(torch::Tensor grad_out,
torch::Tensor x, torch::Tensor basis,
torch::Tensor weight_index,
int64_t kernel_size) {
if (grad_out.device().is_cuda()) {
#ifdef WITH_HIP
return spline_weighting_bw_weight_cuda(grad_out, x, basis, weight_index,
kernel_size);
#else
AT_ERROR("Not compiled with CUDA support");
#endif
} else {
return spline_weighting_bw_weight_cpu(grad_out, x, basis, weight_index,
kernel_size);
}
}
torch::Tensor spline_weighting_bw_basis(torch::Tensor grad_out, torch::Tensor x,
torch::Tensor weight,
torch::Tensor weight_index) {
if (grad_out.device().is_cuda()) {
#ifdef WITH_HIP
return spline_weighting_bw_basis_cuda(grad_out, x, weight, weight_index);
#else
AT_ERROR("Not compiled with CUDA support");
#endif
} else {
return spline_weighting_bw_basis_cpu(grad_out, x, weight, weight_index);
}
}
using torch::autograd::AutogradContext;
using torch::autograd::Variable;
using torch::autograd::variable_list;
class SplineWeighting : public torch::autograd::Function<SplineWeighting> {
public:
static variable_list forward(AutogradContext *ctx, Variable x,
Variable weight, Variable basis,
Variable weight_index) {
auto out = spline_weighting_fw(x, weight, basis, weight_index);
ctx->save_for_backward({x, weight, basis, weight_index});
return {out};
}
static variable_list backward(AutogradContext *ctx, variable_list grad_outs) {
auto grad_out = grad_outs[0];
auto saved = ctx->get_saved_variables();
auto x = saved[0], weight = saved[1], basis = saved[2],
weight_index = saved[3];
auto grad_x = Variable();
if (torch::autograd::any_variable_requires_grad({x})) {
grad_x = spline_weighting_bw_x(grad_out, weight, basis, weight_index);
}
auto grad_weight = Variable();
if (torch::autograd::any_variable_requires_grad({weight})) {
grad_weight = spline_weighting_bw_weight(grad_out, x, basis, weight_index,
weight.size(0));
}
auto grad_basis = Variable();
if (torch::autograd::any_variable_requires_grad({basis})) {
grad_basis = spline_weighting_bw_basis(grad_out, x, weight, weight_index);
}
return {grad_x, grad_weight, grad_basis, Variable()};
}
};
torch::Tensor spline_weighting(torch::Tensor x, torch::Tensor weight,
torch::Tensor basis,
torch::Tensor weight_index) {
x = x.contiguous();
weight = weight.contiguous();
return SplineWeighting::apply(x, weight, basis, weight_index)[0];
}
static auto registry = torch::RegisterOperators().op(
"torch_spline_conv::spline_weighting", &spline_weighting);
#!/bin/bash
source ~/miniconda3/etc/profile.d/conda.sh
conda activate torch1.10_py39_dtk22.10
module purge
module load compiler/devtoolset/7.3.1 mpi/hpcx/gcc-7.3.1 #compiler/dtk/22.10.1
module list
source ~/dtk-22.10.1/env.sh
export C_INCLUDE_PATH=/public/software/apps/DeepLearning/PyTorch_Lib/gflags-2.1.2-build/include:$C_INCLUDE_PATH
export CPLUS_INCLUDE_PATH=/public/software/apps/DeepLearning/PyTorch_Lib/gflags-2.1.2-build/include:$CPLUS_INCLUDE_PATH
export C_INCLUDE_PATH=/public/software/apps/DeepLearning/PyTorch_Lib/glog-build/include:$C_INCLUDE_PATH
export CPLUS_INCLUDE_PATH=/public/software/apps/DeepLearning/PyTorch_Lib/glog-build/include:$CPLUS_INCLUDE_PATH
export C_INCLUDE_PATH=$ROCM_PATH/rocrand/include:$C_INCLUDE_PATH
export CPLUS_INCLUDE_PATH=$ROCM_PATH/rocrand/include:$CPLUS_INCLUDE_PATH
export LD_LIBRARY_PATH=$ROCM_PATH/rocrand/lib:$LD_LIBRARY_PATH
export FORCE_ONLY_HIP=1
export CC=hipcc
export CXX=hipcc
[metadata]
description-file = README.md
[aliases]
test = pytest
[tool:pytest]
addopts = --cov
[egg_info]
tag_build =
tag_date = 0
import os
import glob
import os.path as osp
from itertools import product
from setuptools import setup, find_packages
import torch
from torch.utils.cpp_extension import BuildExtension
from torch.utils.cpp_extension import CppExtension, CUDAExtension, CUDA_HOME
WITH_HIP = torch.cuda.is_available() and CUDA_HOME is not None
suffices = ['cpu', 'cuda'] if WITH_HIP else ['cpu']
if os.getenv('FORCE_CUDA', '0') == '1':
suffices = ['cuda', 'cpu']
if os.getenv('FORCE_ONLY_HIP', '0') == '1':
suffices = ['hip']
if os.getenv('FORCE_ONLY_CPU', '0') == '1':
suffices = ['cpu']
ROCM_PATH = os.getenv('ROCM_PATH')
HIPLIB = osp.join(ROCM_PATH, 'hipsparse', 'include')
BUILD_DOCS = os.getenv('BUILD_DOCS', '0') == '1'
def get_extensions():
extensions = []
extensions_dir = osp.join(osp.dirname(osp.abspath(__file__)), 'csrc')
main_files = glob.glob(osp.join(extensions_dir, '*.cpp'))
for main, suffix in product(main_files, suffices):
define_macros = []
extra_compile_args = {'cxx': ['-O2']}
extra_link_args = ['-s']
if suffix == 'hip':
define_macros += [('WITH_HIP', None)]
hipcc_flags = os.getenv('HIPCC_FLAGS', '')
hipcc_flags = [] if hipcc_flags == '' else hipcc_flags.split(' ')
hipcc_flags += ['-arch=sm_35', '--expt-relaxed-constexpr', '-O2']
extra_compile_args['hipcc'] = hipcc_flags
name = main.split(os.sep)[-1][:-4]
sources = [main]
path = osp.join(extensions_dir, 'cpu', f'{name}_cpu.cpp')
if osp.exists(path):
sources += [path]
path = osp.join(extensions_dir, 'hip', f'{name}_hip.hip')
if suffix == 'hip' and osp.exists(path):
sources += [path]
Extension = CppExtension if suffix == 'cpu' else CUDAExtension
define_macros += [('TORCH_HIP_VERSION', 10000), ('__HIP__', None), ('__HCC__', None)]
extension = Extension(
f'torch_spline_conv._{name}_{suffix}',
sources,
include_dirs=[extensions_dir, HIPLIB],
define_macros=define_macros,
extra_compile_args=extra_compile_args,
extra_link_args=extra_link_args,
)
extensions += [extension]
return extensions
install_requires = []
setup_requires = ['pytest-runner']
tests_require = ['pytest', 'pytest-cov']
setup(
name='torch_spline_conv',
version='1.2.1',
author='Matthias Fey',
author_email='matthias.fey@tu-dortmund.de',
url='https://github.com/rusty1s/pytorch_spline_conv',
description=('Implementation of the Spline-Based Convolution Operator of '
'SplineCNN in PyTorch'),
keywords=[
'pytorch',
'geometric-deep-learning',
'graph-neural-networks',
'spline-cnn',
],
license='MIT',
python_requires='>=3.6',
install_requires=install_requires,
setup_requires=setup_requires,
tests_require=tests_require,
ext_modules=get_extensions() if not BUILD_DOCS else [],
cmdclass={
'build_ext': BuildExtension.with_options(no_python_abi_suffix=True)
},
packages=find_packages(),
)
Metadata-Version: 2.1
Name: torch-spline-conv
Version: 1.2.1
Summary: Implementation of the Spline-Based Convolution Operator of SplineCNN in PyTorch
Home-page: https://github.com/rusty1s/pytorch_spline_conv
Author: Matthias Fey
Author-email: matthias.fey@tu-dortmund.de
License: MIT
Keywords: pytorch,geometric-deep-learning,graph-neural-networks,spline-cnn
Requires-Python: >=3.6
License-File: LICENSE
LICENSE
MANIFEST.in
README.md
setup.cfg
setup.py
/work/home/quyuanhao123/software/test_ocp/torch_spline_conv-1.2.1/csrc/basis.cpp
/work/home/quyuanhao123/software/test_ocp/torch_spline_conv-1.2.1/csrc/version.cpp
/work/home/quyuanhao123/software/test_ocp/torch_spline_conv-1.2.1/csrc/weighting.cpp
/work/home/quyuanhao123/software/test_ocp/torch_spline_conv-1.2.1/csrc/cpu/basis_cpu.cpp
/work/home/quyuanhao123/software/test_ocp/torch_spline_conv-1.2.1/csrc/cpu/weighting_cpu.cpp
/work/home/quyuanhao123/software/test_ocp/torch_spline_conv-1.2.1/csrc/hip/basis_hip_hip.hip
/work/home/quyuanhao123/software/test_ocp/torch_spline_conv-1.2.1/csrc/hip/weighting_hip_hip.hip
csrc/basis.cpp
csrc/spline_conv.h
csrc/version.cpp
csrc/weighting.cpp
csrc/cpu/basis_cpu.cpp
csrc/cpu/basis_cpu.h
csrc/cpu/utils.h
csrc/cpu/weighting_cpu.cpp
csrc/cpu/weighting_cpu.h
csrc/hip/atomics.cuh
csrc/hip/basis_hip.h
csrc/hip/basis_hip.hip
csrc/hip/basis_hip_hip.hip
csrc/hip/utils.cuh
csrc/hip/weighting_hip.h
csrc/hip/weighting_hip.hip
csrc/hip/weighting_hip_hip.hip
torch_spline_conv/__init__.py
torch_spline_conv/basis.py
torch_spline_conv/conv.py
torch_spline_conv/weighting.py
torch_spline_conv.egg-info/PKG-INFO
torch_spline_conv.egg-info/SOURCES.txt
torch_spline_conv.egg-info/dependency_links.txt
torch_spline_conv.egg-info/top_level.txt
\ No newline at end of file
import importlib
import os.path as osp
import torch
__version__ = '1.2.1'
suffix = 'hip' if torch.cuda.is_available() else 'cpu'
for library in ['_version', '_basis', '_weighting']:
torch.ops.load_library(importlib.machinery.PathFinder().find_spec(
f'{library}_{suffix}', [osp.dirname(__file__)]).origin)
if torch.cuda.is_available(): # pragma: no cover
cuda_version = torch.ops.torch_spline_conv.cuda_version()
if cuda_version == -1:
major = minor = 0
elif cuda_version < 10000:
major, minor = int(str(cuda_version)[0]), int(str(cuda_version)[2])
else:
major, minor = int(str(cuda_version)[0:2]), int(str(cuda_version)[3])
from .basis import spline_basis # noqa
from .weighting import spline_weighting # noqa
from .conv import spline_conv # noqa
__all__ = [
'spline_basis',
'spline_weighting',
'spline_conv',
'__version__',
]
from typing import Tuple
import torch
@torch.jit.script
def spline_basis(pseudo: torch.Tensor, kernel_size: torch.Tensor,
is_open_spline: torch.Tensor,
degree: int) -> Tuple[torch.Tensor, torch.Tensor]:
return torch.ops.torch_spline_conv.spline_basis(pseudo, kernel_size,
is_open_spline, degree)
from typing import Optional
import torch
from .basis import spline_basis
from .weighting import spline_weighting
@torch.jit.script
def spline_conv(x: torch.Tensor, edge_index: torch.Tensor,
pseudo: torch.Tensor, weight: torch.Tensor,
kernel_size: torch.Tensor, is_open_spline: torch.Tensor,
degree: int = 1, norm: bool = True,
root_weight: Optional[torch.Tensor] = None,
bias: Optional[torch.Tensor] = None) -> torch.Tensor:
r"""Applies the spline-based convolution operator :math:`(f \star g)(i) =
\frac{1}{|\mathcal{N}(i)|} \sum_{l=1}^{M_{in}} \sum_{j \in \mathcal{N}(i)}
f_l(j) \cdot g_l(u(i, j))` over several node features of an input graph.
The kernel function :math:`g_l` is defined over the weighted B-spline
tensor product basis for a single input feature map :math:`l`.
Args:
x (:class:`Tensor`): Input node features of shape
(number_of_nodes x in_channels).
edge_index (:class:`LongTensor`): Graph edges, given by source and
target indices, of shape (2 x number_of_edges) in the fixed
interval [0, 1].
pseudo (:class:`Tensor`): Edge attributes, ie. pseudo coordinates,
of shape (number_of_edges x number_of_edge_attributes).
weight (:class:`Tensor`): Trainable weight parameters of shape
(kernel_size x in_channels x out_channels).
kernel_size (:class:`LongTensor`): Number of trainable weight
parameters in each edge dimension.
is_open_spline (:class:`ByteTensor`): Whether to use open or closed
B-spline bases for each dimension.
degree (int, optional): B-spline basis degree. (default: :obj:`1`)
norm (bool, optional): Whether to normalize output by node degree.
(default: :obj:`True`)
root_weight (:class:`Tensor`, optional): Additional shared trainable
parameters for each feature of the root node of shape
(in_channels x out_channels). (default: :obj:`None`)
bias (:class:`Tensor`, optional): Optional bias of shape
(out_channels). (default: :obj:`None`)
:rtype: :class:`Tensor`
"""
x = x.unsqueeze(-1) if x.dim() == 1 else x
pseudo = pseudo.unsqueeze(-1) if pseudo.dim() == 1 else pseudo
row, col = edge_index[0], edge_index[1]
N, E, M_out = x.size(0), row.size(0), weight.size(2)
# Weight each node.
basis, weight_index = spline_basis(pseudo, kernel_size, is_open_spline,
degree)
out = spline_weighting(x[col], weight, basis, weight_index)
# Convert E x M_out to N x M_out features.
row_expanded = row.unsqueeze(-1).expand_as(out)
out = x.new_zeros((N, M_out)).scatter_add_(0, row_expanded, out)
# Normalize out by node degree (if wished).
if norm:
ones = torch.ones(E, dtype=x.dtype, device=x.device)
deg = out.new_zeros(N).scatter_add_(0, row, ones)
out = out / deg.unsqueeze(-1).clamp_(min=1)
# Weight root node separately (if wished).
if root_weight is not None:
out = out + torch.matmul(x, root_weight)
# Add bias (if wished).
if bias is not None:
out = out + bias
return out
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment