Commit 5d2cafcb authored by Chao Liu's avatar Chao Liu
Browse files

clean up

parent eace3255
#!/bin/bash
rm -f CMakeCache.txt
rm -f *.cmake
rm -rf CMakeFiles
MY_PROJECT_SOURCE=/package/code/github/test_feature/SpMV
MY_PROJECT_INSTALL=../install.dir
cmake \
-D CMAKE_INSTALL_PREFIX=${MY_PROJECT_INSTALL} \
-D CMAKE_CXX_FLAGS="${CMAKE_CXX_FLAGS} -std=c++11" \
-D CMAKE_BUILD_TYPE=Release \
-D CMAKE_VERBOSE_MAKEFILE:BOOL=ON \
-D BOOST_ROOT="/package/install/boost_1.66.0-mpich_3.2" \
-D CMAKE_CUDA_COMPILER="/package/install/cuda_9.0/bin/nvcc" \
-D CUDA_COMMON_INCLUDE_DIR="/package/code/github/test_feature/cuda_9.0_common/inc" \
-D CMAKE_CUDA_FLAGS="-ccbin g++ -m64 -Xcompiler -fopenmp -lineinfo --source-in-ptx -keep -Xptxas -v -arch=sm_35 -Xptxas -v -maxrregcount=40" \
${MY_PROJECT_SOURCE}
#-D CMAKE_CUDA_FLAGS="-lineinfo --source-in-ptx -keep -Xptxas -v -arch=sm_35 -Xptxas -v -maxrregcount=32" \
#-D CMAKE_CUDA_FLAGS="-G -lineinfo --source-in-ptx -keep -Xptxas -v -arch=sm_35" \
......@@ -5,13 +5,7 @@
#include "nvToolsExt.h"
#include "tensor.hpp"
#include "constant_tensor_descriptor.cuh"
#include "device_tensor_descriptor.cuh"
#if 0
#include "direct_convolution.cuh"
#else
#include "constant_direct_convolution.cuh"
#endif
template <class T>
struct GeneratorConstant
......@@ -116,7 +110,7 @@ void host_convolution(const Tensor<T>& in, const Tensor<T>& wei, Tensor<T>& out)
}
template <class T, class InDesc, class WeiDesc, class OutDesc>
void const_device_convolution(
void device_convolution(
InDesc, const Tensor<T>& in, WeiDesc, const Tensor<T>& wei, OutDesc, Tensor<T>& out)
{
std::size_t data_sz = sizeof(T);
......@@ -126,10 +120,6 @@ void const_device_convolution(
int num_thread = std::thread::hardware_concurrency();
#if 0
out.GenerateTensorValue(GeneratorConstant<float>{0}, num_thread);
#endif
in_device_buf.ToDevice(in.mData.data());
wei_device_buf.ToDevice(wei.mData.data());
out_device_buf.ToDevice(out.mData.data());
......@@ -147,13 +137,13 @@ void const_device_convolution(
constexpr unsigned CPerBlockLoop = 1;
constexpr unsigned OutTileSizeH = 2;
constexpr unsigned OutTileSizeW = 2;
constexpr unsigned YPerBlock = 16;
constexpr unsigned XPerBlock = 16;
constexpr unsigned YPerBlock = 4;
constexpr unsigned XPerBlock = 8;
constexpr unsigned NBlockCopyLen0 = 1;
constexpr unsigned NBlockCopyLen1 = 1;
constexpr unsigned NBlockCopyLen2 = 1;
constexpr unsigned NBlockCopyLen3 = 64;
constexpr unsigned NBlockCopyLen2 = 2;
constexpr unsigned NBlockCopyLen3 = 16;
constexpr unsigned nblock = (out_desc.GetLength(I0) / NPerBlock) *
(out_desc.GetLength(I1) / KPerBlock) *
......@@ -239,31 +229,23 @@ int main()
Tensor<float> wei(make_TensorDescriptor(wei_desc));
Tensor<float> out_host(make_TensorDescriptor(out_desc));
Tensor<float> out_device = out_host;
int num_thread = std::thread::hardware_concurrency();
#if 0
#if 1
in.GenerateTensorValue(GeneratorTensor<float>{}, num_thread);
wei.GenerateTensorValue(GeneratorTensor<float>{}, num_thread);
out_host.GenerateTensorValue(GeneratorConstant<float>{0}, num_thread);
#endif
#if 0
host_convolution(in, wei, out_host);
#endif
Tensor<float> out_device = out_host;
const_device_convolution(in_desc, in, wei_desc, wei, out_desc, out_device);
device_convolution(in_desc, in, wei_desc, wei, out_desc, out_device);
std::cout << __func__ << ": done" << std::endl;
#if 0
LogRange(std::cout << __func__ << "in : ", in.mData, ",") << std::endl;
LogRange(std::cout << __func__ << "wei: ", wei.mData, ",") << std::endl;
LogRange(std::cout, out_host.mData, ",") << std::endl;
LogRange(std::cout, out_device.mData, ",") << std::endl;
#endif
#if 1
host_convolution(in, wei, out_host);
#if 0
float error = 0;
float max_diff = 0;
float host_value = 0, device_value = 0;
......@@ -282,4 +264,11 @@ int main()
std::cout << "max_diff: " << max_diff << ", " << host_value << ", " << device_value
<< std::endl;
#endif
#if 0
LogRange(std::cout << __func__ << "in : ", in.mData, ",") << std::endl;
LogRange(std::cout << __func__ << "wei: ", wei.mData, ",") << std::endl;
LogRange(std::cout, out_host.mData, ",") << std::endl;
LogRange(std::cout, out_device.mData, ",") << std::endl;
#endif
}
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment