"git@developer.sourcefind.cn:modelzoo/resnet50_tensorflow.git" did not exist on "9a833e2c455b7cb19dbed7fd2b527270da82e3c2"
Commit dab29389 authored by Chao Liu's avatar Chao Liu
Browse files

tested on P100

parent 85ae70d3
...@@ -597,7 +597,7 @@ int main(int argc, char* argv[]) ...@@ -597,7 +597,7 @@ int main(int argc, char* argv[])
constexpr index_t WPad = 0; constexpr index_t WPad = 0;
#elif 0 #elif 0
// 1x1 filter, 8x8 image // 1x1 filter, 8x8 image
// cuDNN 68%, ck:nvidia: 72.6%, ck:amd 34% // cudnn@V100 68%, ck@V100 72%, ck@P100 52%, ck@MI50 34%
constexpr index_t N = 64; constexpr index_t N = 64;
constexpr index_t C = 1536; constexpr index_t C = 1536;
constexpr index_t HI = 8; constexpr index_t HI = 8;
...@@ -613,7 +613,7 @@ int main(int argc, char* argv[]) ...@@ -613,7 +613,7 @@ int main(int argc, char* argv[])
constexpr index_t WPad = 0; constexpr index_t WPad = 0;
#elif 0 #elif 0
// 1x1 filter, 8x8 image // 1x1 filter, 8x8 image
// cuDNN 77%, ck:nvidia 76.4%, ck:amd 47% // cudnn@V100 77%, ck@V100 76%, ck@P100 79%, ck@MI50 47%
constexpr index_t N = 128; constexpr index_t N = 128;
constexpr index_t C = 2048; constexpr index_t C = 2048;
constexpr index_t HI = 8; constexpr index_t HI = 8;
...@@ -629,7 +629,7 @@ int main(int argc, char* argv[]) ...@@ -629,7 +629,7 @@ int main(int argc, char* argv[])
constexpr index_t WPad = 0; constexpr index_t WPad = 0;
#elif 0 #elif 0
// 1x1 filter, 7x7 image // 1x1 filter, 7x7 image
// cuDNN 82%, ck:nvidia 76.6%, ck:amd 54% // cudnn@V100 82%, ck@V100 76%, ck@P100 67%, ck@MI50 54%
constexpr index_t N = 128; constexpr index_t N = 128;
constexpr index_t C = 832; constexpr index_t C = 832;
constexpr index_t HI = 7; constexpr index_t HI = 7;
...@@ -645,7 +645,7 @@ int main(int argc, char* argv[]) ...@@ -645,7 +645,7 @@ int main(int argc, char* argv[])
constexpr index_t WPad = 0; constexpr index_t WPad = 0;
#elif 0 #elif 0
// 1x1 filter, 8x8 image // 1x1 filter, 8x8 image
// cuDNN 83%, ck:nvidia 75.4%, ck:amd 58% // cudnn@V100 83%, ck@V100 75%, ck@P100 78%, ck@MI50 58%
constexpr index_t N = 128; constexpr index_t N = 128;
constexpr index_t C = 1280; constexpr index_t C = 1280;
constexpr index_t HI = 8; constexpr index_t HI = 8;
...@@ -659,9 +659,9 @@ int main(int argc, char* argv[]) ...@@ -659,9 +659,9 @@ int main(int argc, char* argv[])
constexpr index_t HPad = 0; constexpr index_t HPad = 0;
constexpr index_t WPad = 0; constexpr index_t WPad = 0;
#elif 0 #elif 1
// 1x1 filter, 14x14 image // 1x1 filter, 14x14 image
// cuDNN 62%, ck:nvidia 68.4%, ck:amd 44% // cudnn@V100 62%, ck@V100 68%, ck@P100 70%, ck@MI50 44%
constexpr index_t N = 128; constexpr index_t N = 128;
constexpr index_t C = 512; constexpr index_t C = 512;
constexpr index_t HI = 14; constexpr index_t HI = 14;
...@@ -677,7 +677,7 @@ int main(int argc, char* argv[]) ...@@ -677,7 +677,7 @@ int main(int argc, char* argv[])
constexpr index_t WPad = 0; constexpr index_t WPad = 0;
#elif 0 #elif 0
// 1x1 filter, 8x8 image // 1x1 filter, 8x8 image
// cuDNN 74%, ck:nvidia 57.1%, ck:amd 52% // cudnn@V100 74%, ck@V100 57%, ck@P100 78%, ck@MI50 52%
constexpr index_t N = 64; constexpr index_t N = 64;
constexpr index_t C = 1536; constexpr index_t C = 1536;
constexpr index_t HI = 8; constexpr index_t HI = 8;
...@@ -693,7 +693,7 @@ int main(int argc, char* argv[]) ...@@ -693,7 +693,7 @@ int main(int argc, char* argv[])
constexpr index_t WPad = 0; constexpr index_t WPad = 0;
#elif 0 #elif 0
// 1x1 filter, 28x28 image // 1x1 filter, 28x28 image
// cuDNN 86%, ck:nvidia 84.6%, ck:amd 64% // cudnn@V100 86%, ck@V100 84%, ck@P100 80%, ck@MI50 64%
constexpr index_t N = 128; constexpr index_t N = 128;
constexpr index_t C = 256; constexpr index_t C = 256;
constexpr index_t HI = 28; constexpr index_t HI = 28;
...@@ -709,7 +709,7 @@ int main(int argc, char* argv[]) ...@@ -709,7 +709,7 @@ int main(int argc, char* argv[])
constexpr index_t WPad = 0; constexpr index_t WPad = 0;
#elif 0 #elif 0
// 1x1 filter, 7x7 image // 1x1 filter, 7x7 image
// cuDNN 71%, ck:55.9%, ck:amd 54% // cudnn@V100 71%, ck@V100 55%, ck@P100 70%, ck@MI50 54%
constexpr index_t N = 128; constexpr index_t N = 128;
constexpr index_t C = 832; constexpr index_t C = 832;
constexpr index_t HI = 7; constexpr index_t HI = 7;
...@@ -725,7 +725,7 @@ int main(int argc, char* argv[]) ...@@ -725,7 +725,7 @@ int main(int argc, char* argv[])
constexpr index_t WPad = 0; constexpr index_t WPad = 0;
#elif 0 #elif 0
// 3x3 filter, 2x2 stride, 35x35 input, 17x17 output // 3x3 filter, 2x2 stride, 35x35 input, 17x17 output
// cuDNN 90%, ck:nvidia 93%, ck:amd 73% // cudnn@V100 90%, ck@V100 93%, ck@P100 83%, ck@MI50 73%
constexpr index_t N = 128; constexpr index_t N = 128;
constexpr index_t C = 288; constexpr index_t C = 288;
constexpr index_t HI = 35; constexpr index_t HI = 35;
...@@ -741,7 +741,7 @@ int main(int argc, char* argv[]) ...@@ -741,7 +741,7 @@ int main(int argc, char* argv[])
constexpr index_t WPad = 0; constexpr index_t WPad = 0;
#elif 0 #elif 0
// 1x1 filter, 17x17 input // 1x1 filter, 17x17 input
// cuDNN 81%, ck:nvidia 76.8%, ck:amd 66% // cudnn@V100 81%, ck@V100 76%, ck@P100 70%, ck@MI50 66%
constexpr index_t N = 128; constexpr index_t N = 128;
constexpr index_t C = 768; constexpr index_t C = 768;
constexpr index_t HI = 17; constexpr index_t HI = 17;
...@@ -755,9 +755,9 @@ int main(int argc, char* argv[]) ...@@ -755,9 +755,9 @@ int main(int argc, char* argv[])
constexpr index_t HPad = 0; constexpr index_t HPad = 0;
constexpr index_t WPad = 0; constexpr index_t WPad = 0;
#elif 1 #elif 0
// 1x1 filter, 14x14 image // 1x1 filter, 14x14 image
// cuDNN 73%, ck:nvidia 72.7%, ck:amd 65% // cudnn@V100 73%, ck@V100 71%, ck@P100 70%, ck@MI50 65%
constexpr index_t N = 128; constexpr index_t N = 128;
constexpr index_t C = 528; constexpr index_t C = 528;
constexpr index_t HI = 14; constexpr index_t HI = 14;
...@@ -773,7 +773,7 @@ int main(int argc, char* argv[]) ...@@ -773,7 +773,7 @@ int main(int argc, char* argv[])
constexpr index_t WPad = 0; constexpr index_t WPad = 0;
#elif 0 #elif 0
// 1x1 filter, 14x14 image // 1x1 filter, 14x14 image
// cuDNN 73%, ck:nvidia 72.7%, ck:amd 65% // cudnn@V100 73%, ck@V100 72%, ck@P100 79%, ck@MI50 65%
constexpr index_t N = 128; constexpr index_t N = 128;
constexpr index_t C = 528; constexpr index_t C = 528;
constexpr index_t HI = 14; constexpr index_t HI = 14;
...@@ -789,7 +789,7 @@ int main(int argc, char* argv[]) ...@@ -789,7 +789,7 @@ int main(int argc, char* argv[])
constexpr index_t WPad = 0; constexpr index_t WPad = 0;
#elif 0 #elif 0
// 1x1 filter, 7x7 image // 1x1 filter, 7x7 image
// cuDNN 49%, ck:nvidia 52.8%, ck:amd 45% // cudnn@V100 49%, ck@V100 50%, ck@P100 61%, ck@MI50 45%
constexpr index_t N = 128; constexpr index_t N = 128;
constexpr index_t C = 832; constexpr index_t C = 832;
constexpr index_t HI = 7; constexpr index_t HI = 7;
......
#!/bin/bash
rm -f CMakeCache.txt
rm -f *.cmake
rm -rf CMakeFiles
MY_PROJECT_SOURCE=../../../
MY_PROJECT_INSTALL=../install.dir
export CUDA_ROOT=/usr/local/cuda
export CPATH=$CPATH:$CUDA_ROOT/include
export LIBRARY_PATH=$LIBRARY_PATH:$CUDA_ROOT/lib64
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$CUDA_ROOT/lib64
cmake \
-D CMAKE_INSTALL_PREFIX=${MY_PROJECT_INSTALL} \
-D CMAKE_CXX_COMPILER=clang++-6.0 \
-D CMAKE_BUILD_TYPE=Release \
-D CMAKE_VERBOSE_MAKEFILE:BOOL=ON \
-D DEVICE_BACKEND=NVIDIA \
-D CUDA_COMMON_INCLUDE_DIR="/root/workspace/NVIDIA_CUDA-10.1_Samples/common/inc" \
-D CMAKE_CUDA_FLAGS="-ccbin clang++-6.0 -m64 -Xcompiler -fopenmp -lineinfo --source-in-ptx -keep -Xptxas -v -gencode=arch=compute_60,code=sm_60 -Xptxas -v -gencode=arch=compute_70,code=sm_70" \
${MY_PROJECT_SOURCE}
#-D CMAKE_CUDA_COMPILER="/package/install/cuda_10.0/bin/nvcc" \
#-D CMAKE_CUDA_FLAGS="-ccbin clang++ -m64 -Xcompiler -fopenmp -lineinfo --source-in-ptx -keep -Xptxas -v -gencode=arch=compute_61,code=sm_61" \
#-D CMAKE_CUDA_FLAGS="-ccbin clang++ -m64 -Xcompiler -fopenmp -lineinfo --source-in-ptx -keep -Xptxas -v -gencode=arch=compute_61,code=sm_61 -Xptxas -v -Xptxas -v -maxrregcount=128" \
#-D CMAKE_CUDA_FLAGS="-ccbin clang++ -m64 -Xcompiler -fopenmp -lineinfo --source-in-ptx -keep -Xptxas -v -gencode=arch=compute_61,code=sm_61 -Xptxas -v -gencode=arch=compute_70,code=sm_70" \
#-D CMAKE_CUDA_FLAGS="-ccbin clang++ -m64 -Xcompiler -fopenmp -lineinfo --source-in-ptx -keep -Xptxas -v -gencode=arch=compute_61,code=sm_61 -Xptxas -v -gencode=arch=compute_70,code=sm_70 -Xptxas -v -maxrregcount=128" \
cuobjdump -xelf all ./driver/driver && nvdisasm --print-code -g driver.sm_61.cubin > driver.sm_61.asm && nvdisasm --print-code -g driver.sm_70.cubin > driver.sm_70.asm cuobjdump -xelf sm_60 ./driver/driver && nvdisasm --print-code -g driver.sm_60.cubin > driver.sm_60.asm
cuobjdump -xelf sm_61 ./driver/driver && nvdisasm --print-code -g driver.sm_61.cubin > driver.sm_61.asm
cuobjdump -xelf sm_70 ./driver/driver && nvdisasm --print-code -g driver.sm_70.cubin > driver.sm_70.asm
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment