Commit dab29389 authored by Chao Liu's avatar Chao Liu
Browse files

tested on P100

parent 85ae70d3
......@@ -597,7 +597,7 @@ int main(int argc, char* argv[])
constexpr index_t WPad = 0;
#elif 0
// 1x1 filter, 8x8 image
// cuDNN 68%, ck:nvidia: 72.6%, ck:amd 34%
// cudnn@V100 68%, ck@V100 72%, ck@P100 52%, ck@MI50 34%
constexpr index_t N = 64;
constexpr index_t C = 1536;
constexpr index_t HI = 8;
......@@ -613,7 +613,7 @@ int main(int argc, char* argv[])
constexpr index_t WPad = 0;
#elif 0
// 1x1 filter, 8x8 image
// cuDNN 77%, ck:nvidia 76.4%, ck:amd 47%
// cudnn@V100 77%, ck@V100 76%, ck@P100 79%, ck@MI50 47%
constexpr index_t N = 128;
constexpr index_t C = 2048;
constexpr index_t HI = 8;
......@@ -629,7 +629,7 @@ int main(int argc, char* argv[])
constexpr index_t WPad = 0;
#elif 0
// 1x1 filter, 7x7 image
// cuDNN 82%, ck:nvidia 76.6%, ck:amd 54%
// cudnn@V100 82%, ck@V100 76%, ck@P100 67%, ck@MI50 54%
constexpr index_t N = 128;
constexpr index_t C = 832;
constexpr index_t HI = 7;
......@@ -645,7 +645,7 @@ int main(int argc, char* argv[])
constexpr index_t WPad = 0;
#elif 0
// 1x1 filter, 8x8 image
// cuDNN 83%, ck:nvidia 75.4%, ck:amd 58%
// cudnn@V100 83%, ck@V100 75%, ck@P100 78%, ck@MI50 58%
constexpr index_t N = 128;
constexpr index_t C = 1280;
constexpr index_t HI = 8;
......@@ -659,9 +659,9 @@ int main(int argc, char* argv[])
constexpr index_t HPad = 0;
constexpr index_t WPad = 0;
#elif 0
#elif 1
// 1x1 filter, 14x14 image
// cuDNN 62%, ck:nvidia 68.4%, ck:amd 44%
// cudnn@V100 62%, ck@V100 68%, ck@P100 70%, ck@MI50 44%
constexpr index_t N = 128;
constexpr index_t C = 512;
constexpr index_t HI = 14;
......@@ -677,7 +677,7 @@ int main(int argc, char* argv[])
constexpr index_t WPad = 0;
#elif 0
// 1x1 filter, 8x8 image
// cuDNN 74%, ck:nvidia 57.1%, ck:amd 52%
// cudnn@V100 74%, ck@V100 57%, ck@P100 78%, ck@MI50 52%
constexpr index_t N = 64;
constexpr index_t C = 1536;
constexpr index_t HI = 8;
......@@ -693,7 +693,7 @@ int main(int argc, char* argv[])
constexpr index_t WPad = 0;
#elif 0
// 1x1 filter, 28x28 image
// cuDNN 86%, ck:nvidia 84.6%, ck:amd 64%
// cudnn@V100 86%, ck@V100 84%, ck@P100 80%, ck@MI50 64%
constexpr index_t N = 128;
constexpr index_t C = 256;
constexpr index_t HI = 28;
......@@ -709,7 +709,7 @@ int main(int argc, char* argv[])
constexpr index_t WPad = 0;
#elif 0
// 1x1 filter, 7x7 image
// cuDNN 71%, ck:55.9%, ck:amd 54%
// cudnn@V100 71%, ck@V100 55%, ck@P100 70%, ck@MI50 54%
constexpr index_t N = 128;
constexpr index_t C = 832;
constexpr index_t HI = 7;
......@@ -725,7 +725,7 @@ int main(int argc, char* argv[])
constexpr index_t WPad = 0;
#elif 0
// 3x3 filter, 2x2 stride, 35x35 input, 17x17 output
// cuDNN 90%, ck:nvidia 93%, ck:amd 73%
// cudnn@V100 90%, ck@V100 93%, ck@P100 83%, ck@MI50 73%
constexpr index_t N = 128;
constexpr index_t C = 288;
constexpr index_t HI = 35;
......@@ -741,7 +741,7 @@ int main(int argc, char* argv[])
constexpr index_t WPad = 0;
#elif 0
// 1x1 filter, 17x17 input
// cuDNN 81%, ck:nvidia 76.8%, ck:amd 66%
// cudnn@V100 81%, ck@V100 76%, ck@P100 70%, ck@MI50 66%
constexpr index_t N = 128;
constexpr index_t C = 768;
constexpr index_t HI = 17;
......@@ -755,9 +755,9 @@ int main(int argc, char* argv[])
constexpr index_t HPad = 0;
constexpr index_t WPad = 0;
#elif 1
#elif 0
// 1x1 filter, 14x14 image
// cuDNN 73%, ck:nvidia 72.7%, ck:amd 65%
// cudnn@V100 73%, ck@V100 71%, ck@P100 70%, ck@MI50 65%
constexpr index_t N = 128;
constexpr index_t C = 528;
constexpr index_t HI = 14;
......@@ -773,7 +773,7 @@ int main(int argc, char* argv[])
constexpr index_t WPad = 0;
#elif 0
// 1x1 filter, 14x14 image
// cuDNN 73%, ck:nvidia 72.7%, ck:amd 65%
// cudnn@V100 73%, ck@V100 72%, ck@P100 79%, ck@MI50 65%
constexpr index_t N = 128;
constexpr index_t C = 528;
constexpr index_t HI = 14;
......@@ -789,7 +789,7 @@ int main(int argc, char* argv[])
constexpr index_t WPad = 0;
#elif 0
// 1x1 filter, 7x7 image
// cuDNN 49%, ck:nvidia 52.8%, ck:amd 45%
// cudnn@V100 49%, ck@V100 50%, ck@P100 61%, ck@MI50 45%
constexpr index_t N = 128;
constexpr index_t C = 832;
constexpr index_t HI = 7;
......
#!/bin/bash
rm -f CMakeCache.txt
rm -f *.cmake
rm -rf CMakeFiles
MY_PROJECT_SOURCE=../../../
MY_PROJECT_INSTALL=../install.dir
export CUDA_ROOT=/usr/local/cuda
export CPATH=$CPATH:$CUDA_ROOT/include
export LIBRARY_PATH=$LIBRARY_PATH:$CUDA_ROOT/lib64
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CUDA_ROOT/lib64
cmake \
-D CMAKE_INSTALL_PREFIX=${MY_PROJECT_INSTALL} \
-D CMAKE_CXX_COMPILER=clang++-6.0 \
-D CMAKE_BUILD_TYPE=Release \
-D CMAKE_VERBOSE_MAKEFILE:BOOL=ON \
-D DEVICE_BACKEND=NVIDIA \
-D CUDA_COMMON_INCLUDE_DIR="/root/workspace/NVIDIA_CUDA-10.1_Samples/common/inc" \
-D CMAKE_CUDA_FLAGS="-ccbin clang++-6.0 -m64 -Xcompiler -fopenmp -lineinfo --source-in-ptx -keep -Xptxas -v -gencode=arch=compute_60,code=sm_60 -Xptxas -v -gencode=arch=compute_70,code=sm_70" \
${MY_PROJECT_SOURCE}
#-D CMAKE_CUDA_COMPILER="/package/install/cuda_10.0/bin/nvcc" \
#-D CMAKE_CUDA_FLAGS="-ccbin clang++ -m64 -Xcompiler -fopenmp -lineinfo --source-in-ptx -keep -Xptxas -v -gencode=arch=compute_61,code=sm_61" \
#-D CMAKE_CUDA_FLAGS="-ccbin clang++ -m64 -Xcompiler -fopenmp -lineinfo --source-in-ptx -keep -Xptxas -v -gencode=arch=compute_61,code=sm_61 -Xptxas -v -Xptxas -v -maxrregcount=128" \
#-D CMAKE_CUDA_FLAGS="-ccbin clang++ -m64 -Xcompiler -fopenmp -lineinfo --source-in-ptx -keep -Xptxas -v -gencode=arch=compute_61,code=sm_61 -Xptxas -v -gencode=arch=compute_70,code=sm_70" \
#-D CMAKE_CUDA_FLAGS="-ccbin clang++ -m64 -Xcompiler -fopenmp -lineinfo --source-in-ptx -keep -Xptxas -v -gencode=arch=compute_61,code=sm_61 -Xptxas -v -gencode=arch=compute_70,code=sm_70 -Xptxas -v -maxrregcount=128" \
cuobjdump -xelf all ./driver/driver && nvdisasm --print-code -g driver.sm_61.cubin > driver.sm_61.asm && nvdisasm --print-code -g driver.sm_70.cubin > driver.sm_70.asm
cuobjdump -xelf sm_60 ./driver/driver && nvdisasm --print-code -g driver.sm_60.cubin > driver.sm_60.asm
cuobjdump -xelf sm_61 ./driver/driver && nvdisasm --print-code -g driver.sm_61.cubin > driver.sm_61.asm
cuobjdump -xelf sm_70 ./driver/driver && nvdisasm --print-code -g driver.sm_70.cubin > driver.sm_70.asm
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment