Commit 04d6ec40 authored by limm's avatar limm
Browse files

push v1.2.1 version

parent 1d2126aa
cmake_minimum_required(VERSION 3.0)
project(torchsplineconv)
set(CMAKE_CXX_STANDARD 14)
set(TORCHSPLINECONV_VERSION 1.2.1)
option(WITH_CUDA "Enable CUDA support" OFF)
if(WITH_CUDA)
enable_language(CUDA)
add_definitions(-D__CUDA_NO_HALF_OPERATORS__)
add_definitions(-DWITH_CUDA)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -arch=sm_35 --expt-relaxed-constexpr")
endif()
find_package(Python3 COMPONENTS Development)
find_package(Torch REQUIRED)
file(GLOB HEADERS csrc/spline_conv.h)
file(GLOB OPERATOR_SOURCES csrc/cpu/*.h csrc/cpu/*.cpp csrc/*.cpp)
if(WITH_CUDA)
file(GLOB OPERATOR_SOURCES ${OPERATOR_SOURCES} csrc/cuda/*.h csrc/cuda/*.cu)
endif()
add_library(${PROJECT_NAME} SHARED ${OPERATOR_SOURCES})
target_link_libraries(${PROJECT_NAME} PRIVATE ${TORCH_LIBRARIES} Python3::Python)
set_target_properties(${PROJECT_NAME} PROPERTIES EXPORT_NAME TorchSplineConv)
target_include_directories(${PROJECT_NAME} INTERFACE
$<BUILD_INTERFACE:${HEADERS}>
$<INSTALL_INTERFACE:${CMAKE_INSTALL_INCLUDEDIR}>)
include(GNUInstallDirs)
include(CMakePackageConfigHelpers)
set(TORCHSPLINECONV_CMAKECONFIG_INSTALL_DIR "share/cmake/TorchSplineConv" CACHE STRING "install path for TorchSplineConvConfig.cmake")
configure_package_config_file(cmake/TorchSplineConvConfig.cmake.in
"${CMAKE_CURRENT_BINARY_DIR}/TorchSplineConvConfig.cmake"
INSTALL_DESTINATION ${TORCHSPLINECONV_CMAKECONFIG_INSTALL_DIR})
write_basic_package_version_file(${CMAKE_CURRENT_BINARY_DIR}/TorchSplineConvConfigVersion.cmake
VERSION ${TORCHSPLINECONV_VERSION}
COMPATIBILITY AnyNewerVersion)
install(FILES ${CMAKE_CURRENT_BINARY_DIR}/TorchSplineConvConfig.cmake
${CMAKE_CURRENT_BINARY_DIR}/TorchSplineConvConfigVersion.cmake
DESTINATION ${TORCHSPLINECONV_CMAKECONFIG_INSTALL_DIR})
install(TARGETS ${PROJECT_NAME}
EXPORT TorchSplineConvTargets
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
)
install(EXPORT TorchSplineConvTargets
NAMESPACE TorchSplineConv::
DESTINATION ${TORCHSPLINECONV_CMAKECONFIG_INSTALL_DIR})
install(FILES ${HEADERS} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME})
install(FILES
csrc/cpu/basis_cpu.h
csrc/cpu/weighting_cpu.h
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}/cpu)
if(WITH_CUDA)
install(FILES
csrc/cuda/basis_cuda.h
csrc/cuda/weighting_cuda.h
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/${PROJECT_NAME}/cuda)
endif()
if(WITH_CUDA)
set_property(TARGET torch_cuda PROPERTY INTERFACE_COMPILE_OPTIONS "")
set_property(TARGET torch_cpu PROPERTY INTERFACE_COMPILE_OPTIONS "")
endif()
# <div align="center"><strong>torch-spline-conv-1.2.1</strong></div>
## 简介
torch-spline-conv是基于PyTorch框架的一个软件包,用于实现图卷积神经网络中的Spline卷积操作。图卷积神经网络是一种能够在图结构数据上进行深度学习的模型,适用于节点分类、图分类和图生成等任务。
[pypi-image]: https://badge.fury.io/py/torch-spline-conv.svg
[pypi-url]: https://pypi.python.org/pypi/torch-spline-conv
[build-image]: https://travis-ci.org/rusty1s/pytorch_spline_conv.svg?branch=master
[build-url]: https://travis-ci.org/rusty1s/pytorch_spline_conv
[coverage-image]: https://codecov.io/gh/rusty1s/pytorch_spline_conv/branch/master/graph/badge.svg
[coverage-url]: https://codecov.io/github/rusty1s/pytorch_spline_conv?branch=master
## 依赖安装
+ pytorch1.10或者pytorch1.13 以及对应的torchvision(建议dtk-22.04.2、dtk-23.04与dtk-23.10)
+ python 3.7-3.10
# Spline-Based Convolution Operator of SplineCNN
### 1、使用源码编译方式安装
[![PyPI Version][pypi-image]][pypi-url]
[![Build Status][build-image]][build-url]
[![Code Coverage][coverage-image]][coverage-url]
#### 编译环境准备
提供2种环境准备方式:
--------------------------------------------------------------------------------
1. 基于光源pytorch基础镜像环境:镜像下载地址:[https://sourcefind.cn/#/image/dcu/pytorch](https://sourcefind.cn/#/image/dcu/pytorch),根据pytorch、python、dtk及系统下载对应的镜像版本。
This is a PyTorch implementation of the spline-based convolution operator of SplineCNN, as described in our paper:
Matthias Fey, Jan Eric Lenssen, Frank Weichert, Heinrich Müller: [SplineCNN: Fast Geometric Deep Learning with Continuous B-Spline Kernels](https://arxiv.org/abs/1711.08920) (CVPR 2018)
The operator works on all floating point data types and is implemented both for CPU and GPU.
## Installation
### Binaries
We provide pip wheels for all major OS/PyTorch/CUDA combinations, see [here](https://pytorch-geometric.com/whl).
#### PyTorch 1.7.0
To install the binaries for PyTorch 1.7.0, simply run
```
pip install torch-spline-conv -f https://pytorch-geometric.com/whl/torch-1.7.0+${CUDA}.html
```
where `${CUDA}` should be replaced by either `cpu`, `cu92`, `cu101`, `cu102`, or `cu110` depending on your PyTorch installation.
| | `cpu` | `cu92` | `cu101` | `cu102` | `cu110` |
|-------------|-------|--------|---------|---------|---------|
| **Linux** | ✅ | ✅ | ✅ | ✅ | ✅ |
| **Windows** | ✅ | ❌ | ✅ | ✅ | ✅ |
| **macOS** | ✅ | | | | |
#### PyTorch 1.6.0
To install the binaries for PyTorch 1.6.0, simply run
```
pip install torch-spline-conv -f https://pytorch-geometric.com/whl/torch-1.6.0+${CUDA}.html
```
where `${CUDA}` should be replaced by either `cpu`, `cu92`, `cu101` or `cu102` depending on your PyTorch installation.
| | `cpu` | `cu92` | `cu101` | `cu102` |
|-------------|-------|--------|---------|---------|
| **Linux** | ✅ | ✅ | ✅ | ✅ |
| **Windows** | ✅ | ❌ | ✅ | ✅ |
| **macOS** | ✅ | | | |
**Note:** Binaries of older versions are also provided for PyTorch 1.4.0 and PyTorch 1.5.0 (following the same procedure).
### From source
Ensure that at least PyTorch 1.4.0 is installed and verify that `cuda/bin` and `cuda/include` are in your `$PATH` and `$CPATH` respectively, *e.g.*:
```
$ python -c "import torch; print(torch.__version__)"
>>> 1.4.0
$ echo $PATH
>>> /usr/local/cuda/bin:...
$ echo $CPATH
>>> /usr/local/cuda/include:...
```
Then run:
2. 基于现有python环境:安装pytorch和torchvision,whl包下载目录:[https://cancon.hpccube.com:65024/4/main/pytorch](https://cancon.hpccube.com:65024/4/main/pytorch)[https://cancon.hpccube.com:65024/4/main/vision](https://cancon.hpccube.com:65024/4/main/vision),根据python、dtk版本,下载对应pytorch和torchvision的whl包。安装命令如下:
```shell
pip install torch* (下载的torch的whl包)
pip install torchvision* (下载的torchvision的whl包)
pip install setuptools==59.5.0 wheel
```
pip install torch-spline-conv
```
When running in a docker container without NVIDIA driver, PyTorch needs to evaluate the compute capabilities and may fail.
In this case, ensure that the compute capabilities are set via `TORCH_CUDA_ARCH_LIST`, *e.g.*:
#### 源码编译安装
- 代码下载
```shell
git clone http://developer.hpccube.com/codes/aicomponent/torch-spline-conv # 根据编译需要切换分支
```
- 源码编译(进入torch-spline-conv目录):
export TORCH_CUDA_ARCH_LIST = "6.0 6.1 7.2+PTX 7.5+PTX"
```
export C_INCLUDE_PATH=/public/software/apps/DeepLearning/PyTorch_Lib/gflags-2.1.2-build/include:$C_INCLUDE_PATH
export CPLUS_INCLUDE_PATH=/public/software/apps/DeepLearning/PyTorch_Lib/gflags-2.1.2-build/include:$CPLUS_INCLUDE_PATH
export C_INCLUDE_PATH=/public/software/apps/DeepLearning/PyTorch_Lib/glog-build/include:$C_INCLUDE_PATH
export CPLUS_INCLUDE_PATH=/public/software/apps/DeepLearning/PyTorch_Lib/glog-build/include:$CPLUS_INCLUDE_PATH
export C_INCLUDE_PATH=$ROCM_PATH/rocrand/include:$C_INCLUDE_PATH
export CPLUS_INCLUDE_PATH=$ROCM_PATH/rocrand/include:$CPLUS_INCLUDE_PATH
export LD_LIBRARY_PATH=$ROCM_PATH/rocrand/lib:$LD_LIBRARY_PATH
export FORCE_ONLY_HIP=1
export CC=hipcc
export CXX=hipcc
python setup.py install
## Usage
```python
from torch_spline_conv import spline_conv
out = spline_conv(x,
edge_index,
pseudo,
weight,
kernel_size,
is_open_spline,
degree=1,
norm=True,
root_weight=None,
bias=None)
```
#### 注意事项
+ 若使用pip install下载安装过慢,可添加pypi清华源:-i https://pypi.tuna.tsinghua.edu.cn/simple/
+ ROCM_PATH为dtk的路径,默认为/opt/dtk
## 验证
Applies the spline-based convolution operator
<p align="center">
<img width="50%" src="https://user-images.githubusercontent.com/6945922/38684093-36d9c52e-3e6f-11e8-9021-db054223c6b9.png" />
</p>
over several node features of an input graph.
The kernel function is defined over the weighted B-spline tensor product basis, as shown below for different B-spline degrees.
<p align="center">
<img width="45%" src="https://user-images.githubusercontent.com/6945922/38685443-3a2a0c68-3e72-11e8-8e13-9ce9ad8fe43e.png" />
<img width="45%" src="https://user-images.githubusercontent.com/6945922/38685459-42b2bcae-3e72-11e8-88cc-4b61e41dbd93.png" />
</p>
### Parameters
* **x** *(Tensor)* - Input node features of shape `(number_of_nodes x in_channels)`.
* **edge_index** *(LongTensor)* - Graph edges, given by source and target indices, of shape `(2 x number_of_edges)`.
* **pseudo** *(Tensor)* - Edge attributes, ie. pseudo coordinates, of shape `(number_of_edges x number_of_edge_attributes)` in the fixed interval [0, 1].
* **weight** *(Tensor)* - Trainable weight parameters of shape `(kernel_size x in_channels x out_channels)`.
* **kernel_size** *(LongTensor)* - Number of trainable weight parameters in each edge dimension.
* **is_open_spline** *(ByteTensor)* - Whether to use open or closed B-spline bases for each dimension.
* **degree** *(int, optional)* - B-spline basis degree. (default: `1`)
* **norm** *(bool, optional)*: Whether to normalize output by node degree. (default: `True`)
* **root_weight** *(Tensor, optional)* - Additional shared trainable parameters for each feature of the root node of shape `(in_channels x out_channels)`. (default: `None`)
* **bias** *(Tensor, optional)* - Optional bias of shape `(out_channels)`. (default: `None`)
### Returns
* **out** *(Tensor)* - Out node features of shape `(number_of_nodes x out_channels)`.
### Example
```python
import torch
......@@ -68,10 +157,34 @@ print(out.size())
torch.Size([4, 4]) # 4 nodes with 4 features each
```
## Known Issue
- 该库没有基于cpu环境修改,仅支持dcu,请在有dcu卡的环境运行。
- 如需完整使用所有pyg功能,请pip install torch-geometric
## Cite
Please cite our paper if you use this code in your own work:
## 参考资料
- [README_ORIGIN](README_ORIGIN.md)
- [https://pypi.org/project/torch-spline-conv/1.2.1/](https://pypi.org/project/torch-spline-conv/1.2.1/)
```
@inproceedings{Fey/etal/2018,
title={{SplineCNN}: Fast Geometric Deep Learning with Continuous {B}-Spline Kernels},
author={Fey, Matthias and Lenssen, Jan Eric and Weichert, Frank and M{\"u}ller, Heinrich},
booktitle={IEEE Conference on Computer Vision and Pattern Recognition (CVPR)},
year={2018},
}
```
## Running tests
```
python setup.py test
```
## C++ API
`torch-spline-conv` also offers a C++ API that contains C++ equivalent of python models.
```
mkdir build
cd build
# Add -DWITH_CUDA=on support for the CUDA if needed
cmake ..
make
make install
```
# TorchSplineConvConfig.cmake
# --------------------
#
# Exported targets:: SplineConv
#
@PACKAGE_INIT@
set(PN TorchSplineConv)
set(${PN}_INCLUDE_DIR "${PACKAGE_PREFIX_DIR}/@CMAKE_INSTALL_INCLUDEDIR@")
set(${PN}_LIBRARY "")
set(${PN}_DEFINITIONS USING_${PN})
check_required_components(${PN})
if(NOT (CMAKE_VERSION VERSION_LESS 3.0))
#-----------------------------------------------------------------------------
# Don't include targets if this file is being picked up by another
# project which has already built this as a subproject
#-----------------------------------------------------------------------------
if(NOT TARGET ${PN}::TorchSplineConv)
include("${CMAKE_CURRENT_LIST_DIR}/${PN}Targets.cmake")
if(NOT TARGET torch_library)
find_package(Torch REQUIRED)
endif()
if(NOT TARGET Python3::Python)
find_package(Python3 COMPONENTS Development)
endif()
target_link_libraries(TorchSplineConv::TorchSplineConv INTERFACE ${TORCH_LIBRARIES} Python3::Python)
if(@WITH_CUDA@)
target_compile_definitions(TorchSplineConv::TorchSplineConv INTERFACE WITH_CUDA)
endif()
endif()
endif()
......@@ -3,12 +3,12 @@
#include "cpu/basis_cpu.h"
#ifdef WITH_HIP
#include "hip/basis_hip.h"
#ifdef WITH_CUDA
#include "cuda/basis_cuda.h"
#endif
#ifdef _WIN32
#ifdef WITH_HIP
#ifdef WITH_CUDA
PyMODINIT_FUNC PyInit__basis_cuda(void) { return NULL; }
#else
PyMODINIT_FUNC PyInit__basis_cpu(void) { return NULL; }
......@@ -19,7 +19,7 @@ std::tuple<torch::Tensor, torch::Tensor>
spline_basis_fw(torch::Tensor pseudo, torch::Tensor kernel_size,
torch::Tensor is_open_spline, int64_t degree) {
if (pseudo.device().is_cuda()) {
#ifdef WITH_HIP
#ifdef WITH_CUDA
return spline_basis_fw_cuda(pseudo, kernel_size, is_open_spline, degree);
#else
AT_ERROR("Not compiled with CUDA support");
......@@ -33,7 +33,7 @@ torch::Tensor spline_basis_bw(torch::Tensor grad_basis, torch::Tensor pseudo,
torch::Tensor kernel_size,
torch::Tensor is_open_spline, int64_t degree) {
if (grad_basis.device().is_cuda()) {
#ifdef WITH_HIP
#ifdef WITH_CUDA
return spline_basis_bw_cuda(grad_basis, pseudo, kernel_size, is_open_spline,
degree);
#else
......
#pragma once
static inline __device__ void atomAdd(float *address, float val) {
atomicAdd(address, val);
}
#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 600 || CUDA_VERSION < 8000)
static inline __device__ void atomAdd(double *address, double val) {
unsigned long long int *address_as_ull = (unsigned long long int *)address;
unsigned long long int old = *address_as_ull;
unsigned long long int assumed;
do {
assumed = old;
old = atomicCAS(address_as_ull, assumed,
__double_as_longlong(val + __longlong_as_double(assumed)));
} while (assumed != old);
}
#else
static inline __device__ void atomAdd(double *address, double val) {
atomicAdd(address, val);
}
#endif
#include "basis_cuda.h"
#include <ATen/cuda/CUDAContext.h>
#include "utils.cuh"
#define THREADS 1024
#define BLOCKS(N) (N + THREADS - 1) / THREADS
template <typename scalar_t, int64_t degree> struct Basis {
static inline __device__ scalar_t forward(scalar_t v, int64_t k_mod) {
if (degree == 1) {
return 1. - v - k_mod + 2. * v * k_mod;
} else if (degree == 2) {
if (k_mod == 0)
return 0.5 * v * v - v + 0.5;
else if (k_mod == 1)
return -v * v + v + 0.5;
else
return 0.5 * v * v;
} else if (degree == 3) {
if (k_mod == 0)
return (1. - v) * (1. - v) * (1. - v) / 6.;
else if (k_mod == 1)
return (3. * v * v * v - 6. * v * v + 4.) / 6.;
else if (k_mod == 2)
return (-3. * v * v * v + 3. * v * v + 3. * v + 1.) / 6.;
else
return v * v * v / 6.;
} else {
return (scalar_t)-1.;
}
}
static inline __device__ scalar_t backward(scalar_t v, int64_t k_mod) {
if (degree == 1) {
return 2 * k_mod - 1;
} else if (degree == 2) {
if (k_mod == 0)
return v - 1.;
else if (k_mod == 1)
return -2. * v + 1.;
else
return v;
} else if (degree == 3) {
if (k_mod == 0)
return (-v * v + 2. * v - 1.) / 2.;
else if (k_mod == 1)
return (3. * v * v - 4. * v) / 2.;
else if (k_mod == 2)
return (-3. * v * v + 2. * v + 1.) / 2.;
else
return v * v / 2.;
} else {
return (scalar_t)-1.;
}
}
};
template <typename scalar_t, int64_t degree>
__global__ void
spline_basis_fw_kernel(const scalar_t *pseudo, const int64_t *kernel_size,
const uint8_t *is_open_spline, scalar_t *basis,
int64_t *weight_index, int64_t E, int64_t D, int64_t S,
int64_t numel) {
const int64_t thread_idx = blockIdx.x * blockDim.x + threadIdx.x;
const int64_t e = thread_idx / S;
const int64_t s = thread_idx % S;
if (thread_idx < numel) {
int64_t k = s, wi = 0, wi_offset = 1;
scalar_t b = (scalar_t)1.;
for (int64_t d = 0; d < D; d++) {
const int64_t k_mod = k % (degree + 1);
k /= degree + 1;
scalar_t v = pseudo[e * D + d];
v *= kernel_size[d] - degree * is_open_spline[d];
wi += (((int64_t)v + k_mod) % kernel_size[d]) * wi_offset;
wi_offset *= kernel_size[d];
v -= floor(v);
v = Basis<scalar_t, degree>::forward(v, k_mod);
b *= v;
}
basis[thread_idx] = b;
weight_index[thread_idx] = wi;
}
}
std::tuple<torch::Tensor, torch::Tensor>
spline_basis_fw_cuda(torch::Tensor pseudo, torch::Tensor kernel_size,
torch::Tensor is_open_spline, int64_t degree) {
CHECK_CUDA(pseudo);
CHECK_CUDA(kernel_size);
CHECK_CUDA(is_open_spline);
cudaSetDevice(pseudo.get_device());
CHECK_INPUT(kernel_size.dim() == 1);
CHECK_INPUT(pseudo.size(1) == kernel_size.numel());
CHECK_INPUT(is_open_spline.dim());
CHECK_INPUT(pseudo.size(1) == is_open_spline.numel());
auto E = pseudo.size(0);
auto D = pseudo.size(1);
auto S = (int64_t)(powf(degree + 1, D) + 0.5);
auto basis = at::empty({E, S}, pseudo.options());
auto weight_index = at::empty({E, S}, kernel_size.options());
auto kernel_size_data = kernel_size.data_ptr<int64_t>();
auto is_open_spline_data = is_open_spline.data_ptr<uint8_t>();
auto weight_index_data = weight_index.data_ptr<int64_t>();
auto stream = at::cuda::getCurrentCUDAStream();
AT_DISPATCH_FLOATING_TYPES(pseudo.scalar_type(), "basis_fw", [&] {
auto pseudo_data = pseudo.data_ptr<scalar_t>();
auto basis_data = basis.data_ptr<scalar_t>();
AT_DISPATCH_DEGREE_TYPES(degree, [&] {
spline_basis_fw_kernel<scalar_t, DEGREE>
<<<BLOCKS(basis.numel()), THREADS, 0, stream>>>(
pseudo_data, kernel_size_data, is_open_spline_data, basis_data,
weight_index_data, E, D, S, basis.numel());
});
});
return std::make_tuple(basis, weight_index);
}
template <typename scalar_t, int64_t degree>
__global__ void
spline_basis_bw_kernel(const scalar_t *grad_basis, const scalar_t *pseudo,
const int64_t *kernel_size,
const uint8_t *is_open_spline, scalar_t *grad_pseudo,
int64_t E, int64_t D, int64_t S, int64_t numel) {
const int64_t thread_idx = blockIdx.x * blockDim.x + threadIdx.x;
const int64_t e = thread_idx / D;
const int64_t d = thread_idx % D;
if (thread_idx < numel) {
scalar_t g = (scalar_t)0., tmp;
for (ptrdiff_t s = 0; s < S; s++) {
int64_t k_mod = (s / (int64_t)(powf(degree + 1, d) + 0.5)) % (degree + 1);
scalar_t v = pseudo[e * D + d];
v *= kernel_size[d] - degree * is_open_spline[d];
v -= floor(v);
v = Basis<scalar_t, degree>::backward(v, k_mod);
tmp = v;
for (int64_t d_it = 1; d_it < D; d_it++) {
const int64_t d_new = d_it - (d >= d_it);
k_mod = (s / (int64_t)(powf(degree + 1, d_new) + 0.5)) % (degree + 1);
v = pseudo[e * D + d_new];
v *= kernel_size[d_new] - degree * is_open_spline[d_new];
v -= floor(v);
v = Basis<scalar_t, degree>::forward(v, k_mod);
tmp *= v;
}
g += tmp * grad_basis[e * S + s];
}
g *= kernel_size[d] - degree * is_open_spline[d];
grad_pseudo[thread_idx] = g;
}
}
torch::Tensor spline_basis_bw_cuda(torch::Tensor grad_basis,
torch::Tensor pseudo,
torch::Tensor kernel_size,
torch::Tensor is_open_spline,
int64_t degree) {
CHECK_CUDA(grad_basis);
CHECK_CUDA(pseudo);
CHECK_CUDA(kernel_size);
CHECK_CUDA(is_open_spline);
cudaSetDevice(grad_basis.get_device());
CHECK_INPUT(grad_basis.size(0) == pseudo.size(0));
CHECK_INPUT(kernel_size.dim() == 1);
CHECK_INPUT(pseudo.size(1) == kernel_size.numel());
CHECK_INPUT(is_open_spline.dim());
CHECK_INPUT(pseudo.size(1) == is_open_spline.numel());
auto E = pseudo.size(0);
auto D = pseudo.size(1);
auto S = grad_basis.size(1);
auto grad_pseudo = at::empty({E, D}, pseudo.options());
auto kernel_size_data = kernel_size.data_ptr<int64_t>();
auto is_open_spline_data = is_open_spline.data_ptr<uint8_t>();
auto stream = at::cuda::getCurrentCUDAStream();
AT_DISPATCH_FLOATING_TYPES(pseudo.scalar_type(), "basis_bw", [&] {
auto grad_basis_data = grad_basis.data_ptr<scalar_t>();
auto pseudo_data = pseudo.data_ptr<scalar_t>();
auto grad_pseudo_data = grad_pseudo.data_ptr<scalar_t>();
AT_DISPATCH_DEGREE_TYPES(degree, [&] {
spline_basis_bw_kernel<scalar_t, DEGREE>
<<<BLOCKS(grad_pseudo.numel()), THREADS, 0, stream>>>(
grad_basis_data, pseudo_data, kernel_size_data,
is_open_spline_data, grad_pseudo_data, E, D, S,
grad_pseudo.numel());
});
});
return grad_pseudo;
}
#pragma once
#include <torch/extension.h>
std::tuple<torch::Tensor, torch::Tensor>
spline_basis_fw_cuda(torch::Tensor pseudo, torch::Tensor kernel_size,
torch::Tensor is_open_spline, int64_t degree);
torch::Tensor spline_basis_bw_cuda(torch::Tensor grad_basis,
torch::Tensor pseudo,
torch::Tensor kernel_size,
torch::Tensor is_open_spline,
int64_t degree);
#pragma once
#include <torch/extension.h>
#define CHECK_CUDA(x) \
AT_ASSERTM(x.device().is_cuda(), #x " must be CUDA tensor")
#define CHECK_INPUT(x) AT_ASSERTM(x, "Input mismatch")
#define AT_DISPATCH_DEGREE_TYPES(degree, ...) \
[&] { \
switch (degree) { \
case 1: { \
const int64_t DEGREE = 1; \
return __VA_ARGS__(); \
} \
case 2: { \
const int64_t DEGREE = 2; \
return __VA_ARGS__(); \
} \
case 3: { \
const int64_t DEGREE = 3; \
return __VA_ARGS__(); \
} \
default: \
AT_ERROR("Basis degree not implemented"); \
} \
}()
#include "weighting_cuda.h"
#include <ATen/cuda/CUDAContext.h>
#include "atomics.cuh"
#include "utils.cuh"
#define THREADS 1024
#define BLOCKS(N) (N + THREADS - 1) / THREADS
template <typename scalar_t>
__global__ void
spline_weighting_fw_kernel(const scalar_t *x, const scalar_t *weight,
const scalar_t *basis, const int64_t *weight_index,
scalar_t *out, int64_t E, int64_t M_in,
int64_t M_out, int64_t S, int64_t numel) {
const int64_t thread_idx = blockIdx.x * blockDim.x + threadIdx.x;
const int64_t e = thread_idx / M_out;
const int64_t m_out = thread_idx % M_out;
if (thread_idx < numel) {
scalar_t v = (scalar_t)0.;
for (ptrdiff_t s = 0; s < S; s++) {
const scalar_t b = basis[e * S + s];
const int64_t wi = weight_index[e * S + s];
for (int64_t m_in = 0; m_in < M_in; m_in++) {
scalar_t tmp = weight[wi * M_in * M_out + m_in * M_out + m_out];
tmp *= b * x[e * M_in + m_in];
v += tmp;
}
}
out[thread_idx] = v;
}
}
torch::Tensor spline_weighting_fw_cuda(torch::Tensor x, torch::Tensor weight,
torch::Tensor basis,
torch::Tensor weight_index) {
CHECK_CUDA(x);
CHECK_CUDA(weight);
CHECK_CUDA(basis);
CHECK_CUDA(weight_index);
cudaSetDevice(x.get_device());
CHECK_INPUT(x.size(1) == weight.size(1));
auto E = x.size(0);
auto M_in = x.size(1);
auto M_out = weight.size(2);
auto S = basis.size(1);
auto out = at::empty({E, M_out}, x.options());
auto weight_index_data = weight_index.data_ptr<int64_t>();
auto stream = at::cuda::getCurrentCUDAStream();
AT_DISPATCH_FLOATING_TYPES(x.scalar_type(), "weighting_fw", [&] {
auto x_data = x.data_ptr<scalar_t>();
auto weight_data = weight.data_ptr<scalar_t>();
auto basis_data = basis.data_ptr<scalar_t>();
auto out_data = out.data_ptr<scalar_t>();
spline_weighting_fw_kernel<scalar_t>
<<<BLOCKS(out.numel()), THREADS, 0, stream>>>(
x_data, weight_data, basis_data, weight_index_data, out_data, E,
M_in, M_out, S, out.numel());
});
return out;
}
template <typename scalar_t>
__global__ void
spline_weighting_bw_x_kernel(const scalar_t *grad_out, const scalar_t *weight,
const scalar_t *basis, const int64_t *weight_index,
scalar_t *grad_x, int64_t E, int64_t M_in,
int64_t M_out, int64_t S, int64_t numel) {
const int64_t thread_idx = blockIdx.x * blockDim.x + threadIdx.x;
const int64_t e = thread_idx / M_in;
const int64_t m_in = thread_idx % M_in;
if (thread_idx < numel) {
scalar_t v = (scalar_t)0.;
for (int64_t s = 0; s < S; s++) {
const scalar_t b = basis[e * S + s];
const int64_t wi = weight_index[e * S + s];
for (int64_t m_out = 0; m_out < M_out; m_out++) {
scalar_t tmp = weight[wi * M_out * M_in + m_out * M_in + m_in];
tmp *= b * grad_out[e * M_out + m_out];
v += tmp;
}
}
grad_x[thread_idx] = v;
}
}
torch::Tensor spline_weighting_bw_x_cuda(torch::Tensor grad_out,
torch::Tensor weight,
torch::Tensor basis,
torch::Tensor weight_index) {
CHECK_CUDA(grad_out);
CHECK_CUDA(weight);
CHECK_CUDA(basis);
CHECK_CUDA(weight_index);
cudaSetDevice(grad_out.get_device());
CHECK_INPUT(grad_out.size(1) == weight.size(2));
auto E = grad_out.size(0);
auto M_in = weight.size(1);
auto M_out = grad_out.size(1);
auto S = basis.size(1);
auto grad_x = at::zeros({E, M_in}, grad_out.options());
weight = weight.transpose(1, 2).contiguous(); // Contiguous memory-access.
auto weight_index_data = weight_index.data_ptr<int64_t>();
auto stream = at::cuda::getCurrentCUDAStream();
AT_DISPATCH_FLOATING_TYPES(grad_out.scalar_type(), "weighting_bw_x", [&] {
auto grad_out_data = grad_out.data_ptr<scalar_t>();
auto weight_data = weight.data_ptr<scalar_t>();
auto basis_data = basis.data_ptr<scalar_t>();
auto grad_x_data = grad_x.data_ptr<scalar_t>();
spline_weighting_bw_x_kernel<scalar_t>
<<<BLOCKS(grad_x.numel()), THREADS, 0, stream>>>(
grad_out_data, weight_data, basis_data, weight_index_data,
grad_x_data, E, M_in, M_out, S, grad_x.numel());
});
return grad_x;
}
template <typename scalar_t>
__global__ void spline_weighting_bw_weight_kernel(
const scalar_t *grad_out, const scalar_t *x, const scalar_t *basis,
const int64_t *weight_index, scalar_t *grad_weight, int64_t E, int64_t M_in,
int64_t M_out, int64_t S, int64_t numel) {
const int64_t thread_idx = blockIdx.x * blockDim.x + threadIdx.x;
const int64_t e = thread_idx / M_out;
const int64_t m_out = thread_idx % M_out;
if (thread_idx < numel) {
auto g = grad_out[e * M_out + m_out];
for (int64_t s = 0; s < S; s++) {
const scalar_t b = basis[e * S + s];
const int64_t wi = weight_index[e * S + s];
for (int64_t m_in = 0; m_in < M_in; m_in++) {
auto v = g * b * x[e * M_in + m_in];
atomAdd(&grad_weight[wi * M_in * M_out + m_in * M_out + m_out], v);
}
}
}
}
torch::Tensor spline_weighting_bw_weight_cuda(torch::Tensor grad_out,
torch::Tensor x,
torch::Tensor basis,
torch::Tensor weight_index,
int64_t kernel_size) {
CHECK_CUDA(grad_out);
CHECK_CUDA(x);
CHECK_CUDA(basis);
CHECK_CUDA(weight_index);
cudaSetDevice(grad_out.get_device());
auto E = grad_out.size(0);
auto M_in = x.size(1);
auto M_out = grad_out.size(1);
auto S = basis.size(1);
auto grad_weight = at::zeros({kernel_size, M_in, M_out}, grad_out.options());
auto weight_index_data = weight_index.data_ptr<int64_t>();
auto stream = at::cuda::getCurrentCUDAStream();
AT_DISPATCH_FLOATING_TYPES(x.scalar_type(), "weighting_bw_weight", [&] {
auto grad_out_data = grad_out.data_ptr<scalar_t>();
auto x_data = x.data_ptr<scalar_t>();
auto basis_data = basis.data_ptr<scalar_t>();
auto grad_weight_data = grad_weight.data_ptr<scalar_t>();
spline_weighting_bw_weight_kernel<scalar_t>
<<<BLOCKS(grad_out.numel()), THREADS, 0, stream>>>(
grad_out_data, x_data, basis_data, weight_index_data,
grad_weight_data, E, M_in, M_out, S, grad_out.numel());
});
return grad_weight;
}
template <typename scalar_t>
__global__ void spline_weighting_bw_basis_kernel(
const scalar_t *grad_out, const scalar_t *x, const scalar_t *weight,
const int64_t *weight_index, scalar_t *grad_basis, int64_t E, int64_t M_in,
int64_t M_out, int64_t S, int64_t numel) {
const size_t thread_idx = blockIdx.x * blockDim.x + threadIdx.x;
const int64_t e = thread_idx / M_out;
const int64_t m_out = thread_idx % M_out;
if (thread_idx < numel) {
const scalar_t g = grad_out[e * M_out + m_out];
for (int64_t s = 0; s < S; s++) {
scalar_t v = (scalar_t)0.;
const int64_t wi = weight_index[e * S + s];
for (int64_t m_in = 0; m_in < M_in; m_in++) {
const scalar_t w = weight[wi * M_in * M_out + m_in * M_out + m_out];
v += g * w * x[e * M_in + m_in];
}
atomAdd(&grad_basis[e * S + s], v);
}
}
}
torch::Tensor spline_weighting_bw_basis_cuda(torch::Tensor grad_out,
torch::Tensor x,
torch::Tensor weight,
torch::Tensor weight_index) {
CHECK_CUDA(grad_out);
CHECK_CUDA(x);
CHECK_CUDA(weight);
CHECK_CUDA(weight_index);
cudaSetDevice(grad_out.get_device());
CHECK_INPUT(x.size(1) == weight.size(1));
CHECK_INPUT(grad_out.size(1) == weight.size(2));
auto E = grad_out.size(0);
auto M_in = x.size(1);
auto M_out = grad_out.size(1);
auto S = weight_index.size(1);
auto grad_basis = at::zeros({E, S}, grad_out.options());
auto weight_index_data = weight_index.data_ptr<int64_t>();
auto stream = at::cuda::getCurrentCUDAStream();
AT_DISPATCH_FLOATING_TYPES(x.scalar_type(), "weighting_bw_basis", [&] {
auto grad_out_data = grad_out.data_ptr<scalar_t>();
auto x_data = x.data_ptr<scalar_t>();
auto weight_data = weight.data_ptr<scalar_t>();
auto grad_basis_data = grad_basis.data_ptr<scalar_t>();
spline_weighting_bw_basis_kernel<scalar_t>
<<<BLOCKS(grad_out.numel()), THREADS, 0, stream>>>(
grad_out_data, x_data, weight_data, weight_index_data,
grad_basis_data, E, M_in, M_out, S, grad_out.numel());
});
return grad_basis;
}
#pragma once
#include <torch/extension.h>
torch::Tensor spline_weighting_fw_cuda(torch::Tensor x, torch::Tensor weight,
torch::Tensor basis,
torch::Tensor weight_index);
torch::Tensor spline_weighting_bw_x_cuda(torch::Tensor grad_out,
torch::Tensor weight,
torch::Tensor basis,
torch::Tensor weight_index);
torch::Tensor spline_weighting_bw_weight_cuda(torch::Tensor grad_out,
torch::Tensor x,
torch::Tensor basis,
torch::Tensor weight_index,
int64_t kernel_size);
torch::Tensor spline_weighting_bw_basis_cuda(torch::Tensor grad_out,
torch::Tensor x,
torch::Tensor weight,
torch::Tensor weight_index);
#include <Python.h>
#include <torch/script.h>
#ifdef WITH_HIP
#include <hip/hip_runtime.h>
#ifdef WITH_CUDA
#include <cuda.h>
#endif
#ifdef _WIN32
#ifdef WITH_HIP
#ifdef WITH_CUDA
PyMODINIT_FUNC PyInit__version_cuda(void) { return NULL; }
#else
PyMODINIT_FUNC PyInit__version_cpu(void) { return NULL; }
......@@ -14,8 +14,8 @@ PyMODINIT_FUNC PyInit__version_cpu(void) { return NULL; }
#endif
int64_t cuda_version() {
#ifdef WITH_HIP
return TORCH_HIP_VERSION;
#ifdef WITH_CUDA
return CUDA_VERSION;
#else
return -1;
#endif
......
......@@ -3,12 +3,12 @@
#include "cpu/weighting_cpu.h"
#ifdef WITH_HIP
#include "hip/weighting_hip.h"
#ifdef WITH_CUDA
#include "cuda/weighting_cuda.h"
#endif
#ifdef _WIN32
#ifdef WITH_HIP
#ifdef WITH_CUDA
PyMODINIT_FUNC PyInit__weighting_cuda(void) { return NULL; }
#else
PyMODINIT_FUNC PyInit__weighting_cpu(void) { return NULL; }
......@@ -19,7 +19,7 @@ torch::Tensor spline_weighting_fw(torch::Tensor x, torch::Tensor weight,
torch::Tensor basis,
torch::Tensor weight_index) {
if (x.device().is_cuda()) {
#ifdef WITH_HIP
#ifdef WITH_CUDA
return spline_weighting_fw_cuda(x, weight, basis, weight_index);
#else
AT_ERROR("Not compiled with CUDA support");
......@@ -33,7 +33,7 @@ torch::Tensor spline_weighting_bw_x(torch::Tensor grad_out,
torch::Tensor weight, torch::Tensor basis,
torch::Tensor weight_index) {
if (grad_out.device().is_cuda()) {
#ifdef WITH_HIP
#ifdef WITH_CUDA
return spline_weighting_bw_x_cuda(grad_out, weight, basis, weight_index);
#else
AT_ERROR("Not compiled with CUDA support");
......@@ -48,7 +48,7 @@ torch::Tensor spline_weighting_bw_weight(torch::Tensor grad_out,
torch::Tensor weight_index,
int64_t kernel_size) {
if (grad_out.device().is_cuda()) {
#ifdef WITH_HIP
#ifdef WITH_CUDA
return spline_weighting_bw_weight_cuda(grad_out, x, basis, weight_index,
kernel_size);
#else
......@@ -64,7 +64,7 @@ torch::Tensor spline_weighting_bw_basis(torch::Tensor grad_out, torch::Tensor x,
torch::Tensor weight,
torch::Tensor weight_index) {
if (grad_out.device().is_cuda()) {
#ifdef WITH_HIP
#ifdef WITH_CUDA
return spline_weighting_bw_basis_cuda(grad_out, x, weight, weight_index);
#else
AT_ERROR("Not compiled with CUDA support");
......
#!/bin/bash
if [ "${TRAVIS_OS_NAME}" = "linux" ]; then
wget -nv https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh
chmod +x miniconda.sh
./miniconda.sh -b
PATH=/home/travis/miniconda3/bin:${PATH}
fi
if [ "${TRAVIS_OS_NAME}" = "osx" ]; then
wget -nv https://repo.anaconda.com/miniconda/Miniconda3-latest-MacOSX-x86_64.sh -O miniconda.sh
chmod +x miniconda.sh
./miniconda.sh -b
PATH=/Users/travis/miniconda3/bin:${PATH}
fi
if [ "${TRAVIS_OS_NAME}" = "windows" ]; then
choco install openssl.light
choco install miniconda3
PATH=/c/tools/miniconda3/Scripts:$PATH
fi
conda update --yes conda
conda create --yes -n test python="${PYTHON_VERSION}"
#!/bin/bash
if [ "${TRAVIS_OS_NAME}" = "linux" ] && [ "$IDX" = "cpu" ]; then
export TOOLKIT=cpuonly
fi
if [ "${TRAVIS_OS_NAME}" = "linux" ] && [ "$IDX" = "cu92" ]; then
export CUDA_SHORT=9.2
export CUDA=9.2.148-1
export UBUNTU_VERSION=ubuntu1604
export TOOLKIT="cudatoolkit=${CUDA_SHORT}"
fi
if [ "${TRAVIS_OS_NAME}" = "linux" ] && [ "$IDX" = "cu101" ]; then
export IDX=cu101
export CUDA_SHORT=10.1
export CUDA=10.1.243-1
export UBUNTU_VERSION=ubuntu1804
export TOOLKIT="cudatoolkit=${CUDA_SHORT}"
fi
if [ "${TRAVIS_OS_NAME}" = "linux" ] && [ "$IDX" = "cu102" ]; then
export CUDA_SHORT=10.2
export CUDA=10.2.89-1
export UBUNTU_VERSION=ubuntu1804
export TOOLKIT="cudatoolkit=${CUDA_SHORT}"
fi
if [ "${TRAVIS_OS_NAME}" = "linux" ] && [ "$IDX" = "cu110" ]; then
export CUDA_SHORT=11.0
export TOOLKIT="cudatoolkit=${CUDA_SHORT}"
fi
if [ "${TRAVIS_OS_NAME}" = "windows" ] && [ "$IDX" = "cpu" ]; then
export TOOLKIT=cpuonly
fi
if [ "${TRAVIS_OS_NAME}" = "windows" ] && [ "$IDX" = "cu92" ]; then
export CUDA_SHORT=9.2
export CUDA_URL=https://developer.nvidia.com/compute/cuda/${CUDA_SHORT}/Prod2/local_installers2
export CUDA_FILE=cuda_${CUDA_SHORT}.148_win10
export TOOLKIT="cudatoolkit=${CUDA_SHORT}"
fi
if [ "${TRAVIS_OS_NAME}" = "windows" ] && [ "$IDX" = "cu101" ]; then
export CUDA_SHORT=10.1
export CUDA_URL=https://developer.nvidia.com/compute/cuda/${CUDA_SHORT}/Prod/local_installers
export CUDA_FILE=cuda_${CUDA_SHORT}.105_418.96_win10.exe
export TOOLKIT="cudatoolkit=${CUDA_SHORT}"
fi
if [ "${TRAVIS_OS_NAME}" = "windows" ] && [ "$IDX" = "cu102" ]; then
export CUDA_SHORT=10.2
export CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}/Prod/local_installers
export CUDA_FILE=cuda_${CUDA_SHORT}.89_441.22_win10.exe
export TOOLKIT="cudatoolkit=${CUDA_SHORT}"
fi
if [ "${TRAVIS_OS_NAME}" = "windows" ] && [ "$IDX" = "cu110" ]; then
export CUDA_SHORT=11.0
export CUDA_URL=https://developer.download.nvidia.com/compute/cuda/${CUDA_SHORT}.2/local_installers
export CUDA_FILE=cuda_${CUDA_SHORT}.2_451.48_win10.exe
export TOOLKIT="cudatoolkit=${CUDA_SHORT}"
fi
if [ "${TRAVIS_OS_NAME}" = "osx" ] && [ "$IDX" = "cpu" ]; then
export TOOLKIT=""
fi
if [ "${IDX}" = "cpu" ]; then
export FORCE_ONLY_CPU=1
else
export FORCE_CUDA=1
fi
if [ "${TRAVIS_OS_NAME}" = "linux" ] && [ "${IDX}" != "cpu" ] && [ "${IDX}" != "cu110" ]; then
INSTALLER="cuda-repo-${UBUNTU_VERSION}_${CUDA}_amd64.deb"
wget -nv "http://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/${INSTALLER}"
sudo dpkg -i "${INSTALLER}"
wget -nv "https://developer.download.nvidia.com/compute/cuda/repos/${UBUNTU_VERSION}/x86_64/7fa2af80.pub"
sudo apt-key add 7fa2af80.pub
sudo apt update -qq
sudo apt install "cuda-core-${CUDA_SHORT/./-}" "cuda-nvcc-${CUDA_SHORT/./-}" "cuda-libraries-dev-${CUDA_SHORT/./-}"
sudo apt clean
CUDA_HOME=/usr/local/cuda-${CUDA_SHORT}
LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
PATH=${CUDA_HOME}/bin:${PATH}
nvcc --version
# Fix cublas on CUDA 10.1:
if [ -d "/usr/local/cuda-10.2/targets/x86_64-linux/include" ]; then
sudo cp -r /usr/local/cuda-10.2/targets/x86_64-linux/include/* "${CUDA_HOME}/include/"
fi
if [ -d "/usr/local/cuda-10.2/targets/x86_64-linux/lib" ]; then
sudo cp -r /usr/local/cuda-10.2/targets/x86_64-linux/lib/* "${CUDA_HOME}/lib/"
fi
fi
if [ "${TRAVIS_OS_NAME}" = "linux" ] && [ "${IDX}" = "cu110" ]; then
wget -nv https://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-ubuntu1804.pin
sudo mv cuda-ubuntu1804.pin /etc/apt/preferences.d/cuda-repository-pin-600
wget -nv https://developer.download.nvidia.com/compute/cuda/11.0.3/local_installers/cuda-repo-ubuntu1804-11-0-local_11.0.3-450.51.06-1_amd64.deb
sudo dpkg -i cuda-repo-ubuntu1804-11-0-local_11.0.3-450.51.06-1_amd64.deb
sudo apt-key add /var/cuda-repo-ubuntu1804-11-0-local/7fa2af80.pub
sudo apt update -qq
sudo apt install cuda-nvcc-11-0 cuda-libraries-dev-11-0
sudo apt clean
CUDA_HOME=/usr/local/cuda-${CUDA_SHORT}
LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH}
PATH=${CUDA_HOME}/bin:${PATH}
nvcc --version
fi
if [ "${TRAVIS_OS_NAME}" = "windows" ] && [ "${IDX}" != "cpu" ]; then
# Install NVIDIA drivers, see:
# https://github.com/pytorch/vision/blob/master/packaging/windows/internal/cuda_install.bat#L99-L102
curl -k -L "https://drive.google.com/u/0/uc?id=1injUyo3lnarMgWyRcXqKg4UGnN0ysmuq&export=download" --output "/tmp/gpu_driver_dlls.zip"
7z x "/tmp/gpu_driver_dlls.zip" -o"/c/Windows/System32"
# Install CUDA:
wget -nv "${CUDA_URL}/${CUDA_FILE}"
PowerShell -Command "Start-Process -FilePath \"${CUDA_FILE}\" -ArgumentList \"-s nvcc_${CUDA_SHORT} cuobjdump_${CUDA_SHORT} nvprune_${CUDA_SHORT} cupti_${CUDA_SHORT} cublas_dev_${CUDA_SHORT} cudart_${CUDA_SHORT} cufft_dev_${CUDA_SHORT} curand_dev_${CUDA_SHORT} cusolver_dev_${CUDA_SHORT} cusparse_dev_${CUDA_SHORT} npp_dev_${CUDA_SHORT} nvrtc_dev_${CUDA_SHORT} nvml_dev_${CUDA_SHORT}\" -Wait -NoNewWindow"
CUDA_HOME=/c/Program\ Files/NVIDIA\ GPU\ Computing\ Toolkit/CUDA/v${CUDA_SHORT}
PATH=${CUDA_HOME}/bin:$PATH
PATH=/c/Program\ Files\ \(x86\)/Microsoft\ Visual\ Studio/2017/BuildTools/MSBuild/15.0/Bin:$PATH
nvcc --version
fi
#!/bin/bash
if [ "${TRAVIS_OS_NAME}" = "linux" ]; then
sudo add-apt-repository ppa:ubuntu-toolchain-r/test --yes
sudo apt update
sudo apt install gcc-7 g++-7 --yes
sudo update-alternatives --install /usr/bin/gcc gcc /usr/bin/gcc-7 60 \
--slave /usr/bin/g++ g++ /usr/bin/g++-7
sudo update-alternatives --config gcc
gcc --version
g++ --version
fi
#!/bin/bash
# https://github.com/pytorch/pytorch/commit/d2e16dd888a9b5fd55bd475d4fcffb70f388d4f0
if [ "${TRAVIS_OS_NAME}" = "windows" ]; then
echo "Fix nvcc for PyTorch"
sed -i.bak -e 's/CONSTEXPR_EXCEPT_WIN_CUDA/const/g' /c/tools/miniconda3/envs/test/lib/site-packages/torch/include/torch/csrc/jit/api/module.h
sed -i.bak -e 's/return \*(this->value)/return \*((type\*)this->value)/g' /c/tools/miniconda3/envs/test/lib/site-packages/torch/include/pybind11/cast.h
fi
if [ "${TRAVIS_OS_NAME}" = "windows" ] && [ "${TORCH_VERSION}" = "1.7.0" ]; then
echo "Fix nvcc for PyTorch 1.7.0"
sed -i.bak '/static constexpr Symbol Kind/d' /c/tools/miniconda3/envs/test/lib/site-packages/torch/include/torch/csrc/jit/ir/ir.h
fi
......@@ -2,12 +2,7 @@
description-file = README.md
[aliases]
test = pytest
test=pytest
[tool:pytest]
addopts = --cov
[egg_info]
tag_build =
tag_date = 0
......@@ -8,16 +8,14 @@ import torch
from torch.utils.cpp_extension import BuildExtension
from torch.utils.cpp_extension import CppExtension, CUDAExtension, CUDA_HOME
WITH_HIP = torch.cuda.is_available() and CUDA_HOME is not None
suffices = ['cpu', 'cuda'] if WITH_HIP else ['cpu']
WITH_CUDA = torch.cuda.is_available() and CUDA_HOME is not None
suffices = ['cpu', 'cuda'] if WITH_CUDA else ['cpu']
if os.getenv('FORCE_CUDA', '0') == '1':
suffices = ['cuda', 'cpu']
if os.getenv('FORCE_ONLY_HIP', '0') == '1':
suffices = ['hip']
if os.getenv('FORCE_ONLY_CUDA', '0') == '1':
suffices = ['cuda']
if os.getenv('FORCE_ONLY_CPU', '0') == '1':
suffices = ['cpu']
ROCM_PATH = os.getenv('ROCM_PATH')
HIPLIB = osp.join(ROCM_PATH, 'hipsparse', 'include')
BUILD_DOCS = os.getenv('BUILD_DOCS', '0') == '1'
......@@ -33,12 +31,12 @@ def get_extensions():
extra_compile_args = {'cxx': ['-O2']}
extra_link_args = ['-s']
if suffix == 'hip':
define_macros += [('WITH_HIP', None)]
hipcc_flags = os.getenv('HIPCC_FLAGS', '')
hipcc_flags = [] if hipcc_flags == '' else hipcc_flags.split(' ')
hipcc_flags += ['-arch=sm_35', '--expt-relaxed-constexpr', '-O2']
extra_compile_args['hipcc'] = hipcc_flags
if suffix == 'cuda':
define_macros += [('WITH_CUDA', None)]
nvcc_flags = os.getenv('NVCC_FLAGS', '')
nvcc_flags = [] if nvcc_flags == '' else nvcc_flags.split(' ')
nvcc_flags += ['-arch=sm_35', '--expt-relaxed-constexpr', '-O2']
extra_compile_args['nvcc'] = nvcc_flags
name = main.split(os.sep)[-1][:-4]
sources = [main]
......@@ -47,16 +45,15 @@ def get_extensions():
if osp.exists(path):
sources += [path]
path = osp.join(extensions_dir, 'hip', f'{name}_hip.hip')
if suffix == 'hip' and osp.exists(path):
path = osp.join(extensions_dir, 'cuda', f'{name}_cuda.cu')
if suffix == 'cuda' and osp.exists(path):
sources += [path]
Extension = CppExtension if suffix == 'cpu' else CUDAExtension
define_macros += [('TORCH_HIP_VERSION', 10000), ('__HIP__', None), ('__HCC__', None)]
extension = Extension(
f'torch_spline_conv._{name}_{suffix}',
sources,
include_dirs=[extensions_dir, HIPLIB],
include_dirs=[extensions_dir],
define_macros=define_macros,
extra_compile_args=extra_compile_args,
extra_link_args=extra_link_args,
......
from itertools import product
import pytest
import torch
from torch_spline_conv import spline_basis
from .utils import dtypes, devices, tensor
tests = [{
'pseudo': [[0], [0.0625], [0.25], [0.75], [0.9375], [1]],
'kernel_size': [5],
'is_open_spline': [1],
'basis': [[1, 0], [0.75, 0.25], [1, 0], [1, 0], [0.25, 0.75], [1, 0]],
'weight_index': [[0, 1], [0, 1], [1, 2], [3, 4], [3, 4], [4, 0]],
}, {
'pseudo': [[0], [0.0625], [0.25], [0.75], [0.9375], [1]],
'kernel_size': [4],
'is_open_spline': [0],
'basis': [[1, 0], [0.75, 0.25], [1, 0], [1, 0], [0.25, 0.75], [1, 0]],
'weight_index': [[0, 1], [0, 1], [1, 2], [3, 0], [3, 0], [0, 1]],
}, {
'pseudo': [[0.125, 0.5], [0.5, 0.5], [0.75, 0.125]],
'kernel_size': [5, 5],
'is_open_spline': [1, 1],
'basis': [[0.5, 0.5, 0, 0], [1, 0, 0, 0], [0.5, 0, 0.5, 0]],
'weight_index': [[10, 11, 15, 16], [12, 13, 17, 18], [3, 4, 8, 9]]
}]
@pytest.mark.parametrize('test,dtype,device', product(tests, dtypes, devices))
def test_spline_basis_forward(test, dtype, device):
pseudo = tensor(test['pseudo'], dtype, device)
kernel_size = tensor(test['kernel_size'], torch.long, device)
is_open_spline = tensor(test['is_open_spline'], torch.uint8, device)
degree = 1
basis, weight_index = spline_basis(pseudo, kernel_size, is_open_spline,
degree)
assert basis.tolist() == test['basis']
assert weight_index.tolist() == test['weight_index']
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment