Commit ce8a91e4 authored by yan.yan's avatar yan.yan
Browse files

add libspconv example

parent 838aa644
......@@ -110,4 +110,8 @@ venv.bak/
__version__.py
wheelhouse_tmp
\ No newline at end of file
wheelhouse_tmp
example/libspconv/cumm
example/libspconv/spconv/include
example/libspconv/spconv/src
\ No newline at end of file
......@@ -72,6 +72,7 @@ Check [spconv 2.x algorithm introduction](docs/spconv2_algo.pdf) to understand s
* greatly faster int8 kernels (~1.2x~2.7x) in ampere GPUs (tested in RTX 3090)
* no python 3.6 support
* nvrtc support: kernel in old GPUs will be compiled in runtime.
* [libspconv](docs/PURE_CPP_BUILD.md): pure c++ build of all spconv ops. see [example](example/libspconv/run_build.sh)
## Usage
......
# Pure C++ build
Spconv provide a way to generate sources to build a C++ library with all op needed for inference and train.
## Steps
1. Install spconv, Install cumm, Install cumm cmake, or add it as a sub directory
Cmake Install: clone cumm project, use ```mkdir -p build && cd build && cmake .. && make && make install```
Subdirectory: clone cumm project, copy it to your parent project.
2. prepare cmake list files, set some environment variables, then generate code
* Set Envs
```Bash
export CUMM_CUDA_VERSION=11.4 # cuda version, required
export CUMM_DISABLE_JIT=1
export SPCONV_DISABLE_JIT=1
export CUMM_INCLUDE_PATH="\${CUMM_INCLUDE_PATH}" # if you use cumm as a subdirectory, you need this to find cumm includes.
export CUMM_CUDA_ARCH_LIST="6.1;7.5;8.6" # cuda arch flags
```
* Generate Code: Ignore train ops:
```Bash
python -m spconv.gencode --include=/path/to/spconv/include --src=/path/to/spconv/src --inference_only=True
```
* Generate Code: Include train ops:
```Bash
python -m spconv.gencode --include=/path/to/spconv/include --src=/path/to/spconv/src
```
3. Run cmake build.
## Example
see example/libspconv/README.md
cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
project(SpconvExample LANGUAGES CXX CUDA VERSION 0.1)
set(CUMM_DISABLE_CMAKE_INSTALL ON CACHE BOOL "enable X functionality" FORCE)
add_subdirectory(cumm)
add_subdirectory(spconv)
add_executable(main main.cc)
# SPCONV2_INCLUDE_PATH come from spconv/CMakeLists.txt
target_include_directories(main PRIVATE ${SPCONV2_INCLUDE_PATH})
target_link_libraries(main spconv cumm::cumm)
## libspconv Example
run ```run_build.sh``` to get ```libspconv.so```.
\ No newline at end of file
#include <spconvlib/cumm/gemm/main/GemmMainUnitTest.h>
#include <spconvlib/spconv/csrc/sparse/all/SpconvOps.h>
#include <spconvlib/spconv/csrc/sparse/alloc/StaticAllocator.h>
#include <spconvlib/spconv/csrc/sparse/convops/spops/ConvGemmOps.h>
#include <spconvlib/spconv/csrc/sparse/inference/InferenceOps.h>
#include <spconvlib/spconv/csrc/sparse/convops/SimpleExternalSpconvMatmul.h>
#include <spconvlib/spconv/csrc/sparse/convops/gemmops/GemmTunerSimple.h>
#include <spconvlib/spconv/csrc/sparse/convops/spops/ConvGemmOps.h>
using StaticAllocator = spconvlib::spconv::csrc::sparse::alloc::StaticAllocator;
using SpconvOps = spconvlib::spconv::csrc::sparse::all::SpconvOps;
using ConvMain = spconvlib::cumm::conv::main::ConvMainUnitTest;
using ConvTunerSimple =
spconvlib::spconv::csrc::sparse::convops::spops::ConvTuner;
using ConvGemmOps =
spconvlib::spconv::csrc::sparse::convops::spops::ConvGemmOps;
using SimpleExternalSpconvMatmul =
spconvlib::spconv::csrc::sparse::convops::SimpleExternalSpconvMatmul;
using InferenceOps =
spconvlib::spconv::csrc::sparse::inference::InferenceOps;
int main(){
tv::ssprint("Hello libspconv!!!");
return 0;
}
\ No newline at end of file
# remove previous cloned cumm first.
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
git clone https://github.com/FindDefinition/cumm.git $SCRIPT_DIR/cumm
export CUMM_CUDA_VERSION=11.4 # cuda version, required
export CUMM_DISABLE_JIT=1
export SPCONV_DISABLE_JIT=1
export CUMM_INCLUDE_PATH="\${CUMM_INCLUDE_PATH}" # if you use cumm as a subdirectory, you need this to find cumm includes.
export CUMM_CUDA_ARCH_LIST="7.5;8.6" # cuda arch flags
python -m spconv.gencode --include=$SCRIPT_DIR/spconv/include --src=$SCRIPT_DIR/spconv/src --inference_only=True
mkdir -p $SCRIPT_DIR/build
cd $SCRIPT_DIR/build
cmake ..
cmake --build $SCRIPT_DIR/build --config Release -j 8
cmake_minimum_required(VERSION 3.20)
project(spconv LANGUAGES CXX CUDA)
include_directories(include)
add_subdirectory(src)
# tell parent spconv2 include path.
set(SPCONV2_INCLUDE_PATH ${${PROJECT_NAME}_SOURCE_DIR}/include PARENT_SCOPE)
install (TARGETS spconv
ARCHIVE DESTINATION lib
LIBRARY DESTINATION lib
RUNTIME DESTINATION bin)
<!--
Copyright 2022 Yan Yan
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
-->
TODO
\ No newline at end of file
......@@ -72,6 +72,7 @@ class InferenceOpsKernel(pccm.ParameterizedClass):
namespace op = tv::arrayops;
using nv_scalar_t = tv::equivalent_data_type_t<T>;
using MathOp = op::MathScalarOp<nv_scalar_t>;
for (int i : tv::KernelLoopY<int>(size, block_idx_y, OneDim ? num_blocks_y : gridDim.y)) {{
auto out_ptr = out_features + i * num_features;
for (int j : tv::KernelLoopX<int>(num_features, block_idx_x, OneDim ? num_blocks_x : gridDim.x)) {{
......@@ -89,7 +90,7 @@ class InferenceOpsKernel(pccm.ParameterizedClass):
break;
}}
case tv::gemm::Activation::kSigmoid:{{
auto e = op::MathScalarOp<nv_scalar_t>::exp(-*o_nv);
auto e = MathOp::exp(MathOp::neg(*o_nv));
o = T(1) / (T(1) + *reinterpret_cast<T*>( &e ));
break;
}}
......@@ -115,6 +116,8 @@ class InferenceOpsKernel(pccm.ParameterizedClass):
code.raw(f"""
namespace op = tv::arrayops;
using nv_scalar_t = tv::equivalent_data_type_t<T>;
using MathOp = op::MathScalarOp<nv_scalar_t>;
for (int i : tv::KernelLoopX<int>(size)) {{
T o = out_features[i];
auto* o_nv = reinterpret_cast<nv_scalar_t*>(&o);
......@@ -131,7 +134,7 @@ class InferenceOpsKernel(pccm.ParameterizedClass):
break;
}}
case tv::gemm::Activation::kSigmoid:{{
auto e = op::MathScalarOp<nv_scalar_t>::exp(-*o_nv);
auto e = MathOp::exp(MathOp::neg(*o_nv));
out_features[i] = T(1) / (T(1) + *reinterpret_cast<T*>( &e ));
break;
}}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment