Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
dab29389
Commit
dab29389
authored
Jun 27, 2019
by
Chao Liu
Browse files
tested on P100
parent
85ae70d3
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
48 additions
and
16 deletions
+48
-16
driver/src/driver.cpp
driver/src/driver.cpp
+15
-15
script/cmake-cuda_docker.sh
script/cmake-cuda_docker.sh
+30
-0
script/extract_asm-cuda.sh
script/extract_asm-cuda.sh
+3
-1
No files found.
driver/src/driver.cpp
View file @
dab29389
...
...
@@ -597,7 +597,7 @@ int main(int argc, char* argv[])
constexpr
index_t
WPad
=
0
;
#elif 0
// 1x1 filter, 8x8 image
// cu
DNN 68%, ck:nvidia:
72
.6
%, ck
:amd
34%
// cu
dnn@V100 68%, ck@V100
72%, ck
@P100 52%, ck@MI50
34%
constexpr
index_t
N
=
64
;
constexpr
index_t
C
=
1536
;
constexpr
index_t
HI
=
8
;
...
...
@@ -613,7 +613,7 @@ int main(int argc, char* argv[])
constexpr
index_t
WPad
=
0
;
#elif 0
// 1x1 filter, 8x8 image
// cu
DNN 77%, ck:nvidia 76.4%, ck:amd
47%
// cu
dnn@V100 77%, ck@V100 76%, ck@P100 79%, ck@MI50
47%
constexpr
index_t
N
=
128
;
constexpr
index_t
C
=
2048
;
constexpr
index_t
HI
=
8
;
...
...
@@ -629,7 +629,7 @@ int main(int argc, char* argv[])
constexpr
index_t
WPad
=
0
;
#elif 0
// 1x1 filter, 7x7 image
// cu
DNN 82%, ck:nvidia 76.6%, ck:amd
54%
// cu
dnn@V100 82%, ck@V100 76%, ck@P100 67%, ck@MI50
54%
constexpr
index_t
N
=
128
;
constexpr
index_t
C
=
832
;
constexpr
index_t
HI
=
7
;
...
...
@@ -645,7 +645,7 @@ int main(int argc, char* argv[])
constexpr
index_t
WPad
=
0
;
#elif 0
// 1x1 filter, 8x8 image
// cu
DNN 83%, ck:nvidia 75.4%, ck:amd
58%
// cu
dnn@V100 83%, ck@V100 75%, ck@P100 78%, ck@MI50
58%
constexpr
index_t
N
=
128
;
constexpr
index_t
C
=
1280
;
constexpr
index_t
HI
=
8
;
...
...
@@ -659,9 +659,9 @@ int main(int argc, char* argv[])
constexpr
index_t
HPad
=
0
;
constexpr
index_t
WPad
=
0
;
#elif
0
#elif
1
// 1x1 filter, 14x14 image
// cu
DNN 62%, ck:nvidia 68.4%, ck:amd
44%
// cu
dnn@V100 62%, ck@V100 68%, ck@P100 70%, ck@MI50
44%
constexpr
index_t
N
=
128
;
constexpr
index_t
C
=
512
;
constexpr
index_t
HI
=
14
;
...
...
@@ -677,7 +677,7 @@ int main(int argc, char* argv[])
constexpr
index_t
WPad
=
0
;
#elif 0
// 1x1 filter, 8x8 image
// cu
DNN 74%, ck:nvidia 57.1%, ck:amd
52%
// cu
dnn@V100 74%, ck@V100 57%, ck@P100 78%, ck@MI50
52%
constexpr
index_t
N
=
64
;
constexpr
index_t
C
=
1536
;
constexpr
index_t
HI
=
8
;
...
...
@@ -693,7 +693,7 @@ int main(int argc, char* argv[])
constexpr
index_t
WPad
=
0
;
#elif 0
// 1x1 filter, 28x28 image
// cu
DNN 86%, ck:nvidia 84.6%, ck:amd
64%
// cu
dnn@V100 86%, ck@V100 84%, ck@P100 80%, ck@MI50
64%
constexpr
index_t
N
=
128
;
constexpr
index_t
C
=
256
;
constexpr
index_t
HI
=
28
;
...
...
@@ -709,7 +709,7 @@ int main(int argc, char* argv[])
constexpr
index_t
WPad
=
0
;
#elif 0
// 1x1 filter, 7x7 image
// cu
DNN 71%, ck:55.9%, ck:amd
54%
// cu
dnn@V100 71%, ck@V100 55%, ck@P100 70%, ck@MI50
54%
constexpr
index_t
N
=
128
;
constexpr
index_t
C
=
832
;
constexpr
index_t
HI
=
7
;
...
...
@@ -725,7 +725,7 @@ int main(int argc, char* argv[])
constexpr
index_t
WPad
=
0
;
#elif 0
// 3x3 filter, 2x2 stride, 35x35 input, 17x17 output
// cu
DNN
90%, ck
:nvidia
93%, ck
:amd
73%
// cu
dnn@V100
90%, ck
@V100
93%, ck
@P100 83%, ck@MI50
73%
constexpr
index_t
N
=
128
;
constexpr
index_t
C
=
288
;
constexpr
index_t
HI
=
35
;
...
...
@@ -741,7 +741,7 @@ int main(int argc, char* argv[])
constexpr
index_t
WPad
=
0
;
#elif 0
// 1x1 filter, 17x17 input
// cu
DNN 81%, ck:nvidia 76.8%, ck:amd
66%
// cu
dnn@V100 81%, ck@V100 76%, ck@P100 70%, ck@MI50
66%
constexpr
index_t
N
=
128
;
constexpr
index_t
C
=
768
;
constexpr
index_t
HI
=
17
;
...
...
@@ -755,9 +755,9 @@ int main(int argc, char* argv[])
constexpr
index_t
HPad
=
0
;
constexpr
index_t
WPad
=
0
;
#elif
1
#elif
0
// 1x1 filter, 14x14 image
// cu
DNN 73%, ck:nvidia 72.7%, ck:amd
65%
// cu
dnn@V100 73%, ck@V100 71%, ck@P100 70%, ck@MI50
65%
constexpr
index_t
N
=
128
;
constexpr
index_t
C
=
528
;
constexpr
index_t
HI
=
14
;
...
...
@@ -773,7 +773,7 @@ int main(int argc, char* argv[])
constexpr
index_t
WPad
=
0
;
#elif 0
// 1x1 filter, 14x14 image
// cu
DNN 73%, ck:nvidia 72.7%, ck:amd
65%
// cu
dnn@V100 73%, ck@V100 72%, ck@P100 79%, ck@MI50
65%
constexpr
index_t
N
=
128
;
constexpr
index_t
C
=
528
;
constexpr
index_t
HI
=
14
;
...
...
@@ -789,7 +789,7 @@ int main(int argc, char* argv[])
constexpr
index_t
WPad
=
0
;
#elif 0
// 1x1 filter, 7x7 image
// cu
DNN 49%, ck:nvidia 52.8%, ck:amd
45%
// cu
dnn@V100 49%, ck@V100 50%, ck@P100 61%, ck@MI50
45%
constexpr
index_t
N
=
128
;
constexpr
index_t
C
=
832
;
constexpr
index_t
HI
=
7
;
...
...
script/cmake-cuda_docker.sh
0 → 100755
View file @
dab29389
#!/bin/bash
rm
-f
CMakeCache.txt
rm
-f
*
.cmake
rm
-rf
CMakeFiles
MY_PROJECT_SOURCE
=
../../../
MY_PROJECT_INSTALL
=
../install.dir
export
CUDA_ROOT
=
/usr/local/cuda
export
CPATH
=
$CPATH
:
$CUDA_ROOT
/include
export
LIBRARY_PATH
=
$LIBRARY_PATH
:
$CUDA_ROOT
/lib64
export
LD_LIBRARY_PATH
=
$LD_LIBRARY_PATH
:
$CUDA_ROOT
/lib64
cmake
\
-D
CMAKE_INSTALL_PREFIX
=
${
MY_PROJECT_INSTALL
}
\
-D
CMAKE_CXX_COMPILER
=
clang++-6.0
\
-D
CMAKE_BUILD_TYPE
=
Release
\
-D
CMAKE_VERBOSE_MAKEFILE:BOOL
=
ON
\
-D
DEVICE_BACKEND
=
NVIDIA
\
-D
CUDA_COMMON_INCLUDE_DIR
=
"/root/workspace/NVIDIA_CUDA-10.1_Samples/common/inc"
\
-D
CMAKE_CUDA_FLAGS
=
"-ccbin clang++-6.0 -m64 -Xcompiler -fopenmp -lineinfo --source-in-ptx -keep -Xptxas -v -gencode=arch=compute_60,code=sm_60 -Xptxas -v -gencode=arch=compute_70,code=sm_70"
\
${
MY_PROJECT_SOURCE
}
#-D CMAKE_CUDA_COMPILER="/package/install/cuda_10.0/bin/nvcc" \
#-D CMAKE_CUDA_FLAGS="-ccbin clang++ -m64 -Xcompiler -fopenmp -lineinfo --source-in-ptx -keep -Xptxas -v -gencode=arch=compute_61,code=sm_61" \
#-D CMAKE_CUDA_FLAGS="-ccbin clang++ -m64 -Xcompiler -fopenmp -lineinfo --source-in-ptx -keep -Xptxas -v -gencode=arch=compute_61,code=sm_61 -Xptxas -v -Xptxas -v -maxrregcount=128" \
#-D CMAKE_CUDA_FLAGS="-ccbin clang++ -m64 -Xcompiler -fopenmp -lineinfo --source-in-ptx -keep -Xptxas -v -gencode=arch=compute_61,code=sm_61 -Xptxas -v -gencode=arch=compute_70,code=sm_70" \
#-D CMAKE_CUDA_FLAGS="-ccbin clang++ -m64 -Xcompiler -fopenmp -lineinfo --source-in-ptx -keep -Xptxas -v -gencode=arch=compute_61,code=sm_61 -Xptxas -v -gencode=arch=compute_70,code=sm_70 -Xptxas -v -maxrregcount=128" \
script/extract_asm-cuda.sh
View file @
dab29389
cuobjdump
-xelf
all ./driver/driver
&&
nvdisasm
--print-code
-g
driver.sm_61.cubin
>
driver.sm_61.asm
&&
nvdisasm
--print-code
-g
driver.sm_70.cubin
>
driver.sm_70.asm
cuobjdump
-xelf
sm_60 ./driver/driver
&&
nvdisasm
--print-code
-g
driver.sm_60.cubin
>
driver.sm_60.asm
cuobjdump
-xelf
sm_61 ./driver/driver
&&
nvdisasm
--print-code
-g
driver.sm_61.cubin
>
driver.sm_61.asm
cuobjdump
-xelf
sm_70 ./driver/driver
&&
nvdisasm
--print-code
-g
driver.sm_70.cubin
>
driver.sm_70.asm
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment