Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
yangql
composable_kernel-1
Commits
5d2cafcb
Commit
5d2cafcb
authored
Nov 07, 2018
by
Chao Liu
Browse files
clean up
parent
eace3255
Changes
4
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
334 additions
and
1037 deletions
+334
-1037
cmake.sh
cmake.sh
+0
-22
driver/conv.cu
driver/conv.cu
+18
-29
src/include/constant_direct_convolution.cuh
src/include/constant_direct_convolution.cuh
+0
-602
src/include/direct_convolution.cuh
src/include/direct_convolution.cuh
+316
-384
No files found.
cmake.sh
deleted
100755 → 0
View file @
eace3255
#!/bin/bash
rm
-f
CMakeCache.txt
rm
-f
*
.cmake
rm
-rf
CMakeFiles
MY_PROJECT_SOURCE
=
/package/code/github/test_feature/SpMV
MY_PROJECT_INSTALL
=
../install.dir
cmake
\
-D
CMAKE_INSTALL_PREFIX
=
${
MY_PROJECT_INSTALL
}
\
-D
CMAKE_CXX_FLAGS
=
"
${
CMAKE_CXX_FLAGS
}
-std=c++11"
\
-D
CMAKE_BUILD_TYPE
=
Release
\
-D
CMAKE_VERBOSE_MAKEFILE:BOOL
=
ON
\
-D
BOOST_ROOT
=
"/package/install/boost_1.66.0-mpich_3.2"
\
-D
CMAKE_CUDA_COMPILER
=
"/package/install/cuda_9.0/bin/nvcc"
\
-D
CUDA_COMMON_INCLUDE_DIR
=
"/package/code/github/test_feature/cuda_9.0_common/inc"
\
-D
CMAKE_CUDA_FLAGS
=
"-ccbin g++ -m64 -Xcompiler -fopenmp -lineinfo --source-in-ptx -keep -Xptxas -v -arch=sm_35 -Xptxas -v -maxrregcount=40"
\
${
MY_PROJECT_SOURCE
}
#-D CMAKE_CUDA_FLAGS="-lineinfo --source-in-ptx -keep -Xptxas -v -arch=sm_35 -Xptxas -v -maxrregcount=32" \
#-D CMAKE_CUDA_FLAGS="-G -lineinfo --source-in-ptx -keep -Xptxas -v -arch=sm_35" \
driver/conv.cu
View file @
5d2cafcb
...
@@ -5,13 +5,7 @@
...
@@ -5,13 +5,7 @@
#include "nvToolsExt.h"
#include "nvToolsExt.h"
#include "tensor.hpp"
#include "tensor.hpp"
#include "constant_tensor_descriptor.cuh"
#include "constant_tensor_descriptor.cuh"
#include "device_tensor_descriptor.cuh"
#if 0
#include "direct_convolution.cuh"
#include "direct_convolution.cuh"
#else
#include "constant_direct_convolution.cuh"
#endif
template
<
class
T
>
template
<
class
T
>
struct
GeneratorConstant
struct
GeneratorConstant
...
@@ -116,7 +110,7 @@ void host_convolution(const Tensor<T>& in, const Tensor<T>& wei, Tensor<T>& out)
...
@@ -116,7 +110,7 @@ void host_convolution(const Tensor<T>& in, const Tensor<T>& wei, Tensor<T>& out)
}
}
template
<
class
T
,
class
InDesc
,
class
WeiDesc
,
class
OutDesc
>
template
<
class
T
,
class
InDesc
,
class
WeiDesc
,
class
OutDesc
>
void
const_
device_convolution
(
void
device_convolution
(
InDesc
,
const
Tensor
<
T
>&
in
,
WeiDesc
,
const
Tensor
<
T
>&
wei
,
OutDesc
,
Tensor
<
T
>&
out
)
InDesc
,
const
Tensor
<
T
>&
in
,
WeiDesc
,
const
Tensor
<
T
>&
wei
,
OutDesc
,
Tensor
<
T
>&
out
)
{
{
std
::
size_t
data_sz
=
sizeof
(
T
);
std
::
size_t
data_sz
=
sizeof
(
T
);
...
@@ -126,10 +120,6 @@ void const_device_convolution(
...
@@ -126,10 +120,6 @@ void const_device_convolution(
int
num_thread
=
std
::
thread
::
hardware_concurrency
();
int
num_thread
=
std
::
thread
::
hardware_concurrency
();
#if 0
out.GenerateTensorValue(GeneratorConstant<float>{0}, num_thread);
#endif
in_device_buf
.
ToDevice
(
in
.
mData
.
data
());
in_device_buf
.
ToDevice
(
in
.
mData
.
data
());
wei_device_buf
.
ToDevice
(
wei
.
mData
.
data
());
wei_device_buf
.
ToDevice
(
wei
.
mData
.
data
());
out_device_buf
.
ToDevice
(
out
.
mData
.
data
());
out_device_buf
.
ToDevice
(
out
.
mData
.
data
());
...
@@ -147,13 +137,13 @@ void const_device_convolution(
...
@@ -147,13 +137,13 @@ void const_device_convolution(
constexpr
unsigned
CPerBlockLoop
=
1
;
constexpr
unsigned
CPerBlockLoop
=
1
;
constexpr
unsigned
OutTileSizeH
=
2
;
constexpr
unsigned
OutTileSizeH
=
2
;
constexpr
unsigned
OutTileSizeW
=
2
;
constexpr
unsigned
OutTileSizeW
=
2
;
constexpr
unsigned
YPerBlock
=
16
;
constexpr
unsigned
YPerBlock
=
4
;
constexpr
unsigned
XPerBlock
=
16
;
constexpr
unsigned
XPerBlock
=
8
;
constexpr
unsigned
NBlockCopyLen0
=
1
;
constexpr
unsigned
NBlockCopyLen0
=
1
;
constexpr
unsigned
NBlockCopyLen1
=
1
;
constexpr
unsigned
NBlockCopyLen1
=
1
;
constexpr
unsigned
NBlockCopyLen2
=
1
;
constexpr
unsigned
NBlockCopyLen2
=
2
;
constexpr
unsigned
NBlockCopyLen3
=
6
4
;
constexpr
unsigned
NBlockCopyLen3
=
1
6
;
constexpr
unsigned
nblock
=
(
out_desc
.
GetLength
(
I0
)
/
NPerBlock
)
*
constexpr
unsigned
nblock
=
(
out_desc
.
GetLength
(
I0
)
/
NPerBlock
)
*
(
out_desc
.
GetLength
(
I1
)
/
KPerBlock
)
*
(
out_desc
.
GetLength
(
I1
)
/
KPerBlock
)
*
...
@@ -239,31 +229,23 @@ int main()
...
@@ -239,31 +229,23 @@ int main()
Tensor
<
float
>
wei
(
make_TensorDescriptor
(
wei_desc
));
Tensor
<
float
>
wei
(
make_TensorDescriptor
(
wei_desc
));
Tensor
<
float
>
out_host
(
make_TensorDescriptor
(
out_desc
));
Tensor
<
float
>
out_host
(
make_TensorDescriptor
(
out_desc
));
Tensor
<
float
>
out_device
=
out_host
;
int
num_thread
=
std
::
thread
::
hardware_concurrency
();
int
num_thread
=
std
::
thread
::
hardware_concurrency
();
#if
0
#if
1
in
.
GenerateTensorValue
(
GeneratorTensor
<
float
>
{},
num_thread
);
in
.
GenerateTensorValue
(
GeneratorTensor
<
float
>
{},
num_thread
);
wei
.
GenerateTensorValue
(
GeneratorTensor
<
float
>
{},
num_thread
);
wei
.
GenerateTensorValue
(
GeneratorTensor
<
float
>
{},
num_thread
);
out_host
.
GenerateTensorValue
(
GeneratorConstant
<
float
>
{
0
},
num_thread
);
#endif
#endif
#if 0
Tensor
<
float
>
out_device
=
out_host
;
host_convolution(in, wei, out_host);
#endif
const_
device_convolution
(
in_desc
,
in
,
wei_desc
,
wei
,
out_desc
,
out_device
);
device_convolution
(
in_desc
,
in
,
wei_desc
,
wei
,
out_desc
,
out_device
);
std
::
cout
<<
__func__
<<
": done"
<<
std
::
endl
;
std
::
cout
<<
__func__
<<
": done"
<<
std
::
endl
;
#if 0
#if 1
LogRange(std::cout << __func__ << "in : ", in.mData, ",") << std::endl;
host_convolution
(
in
,
wei
,
out_host
);
LogRange(std::cout << __func__ << "wei: ", wei.mData, ",") << std::endl;
LogRange(std::cout, out_host.mData, ",") << std::endl;
LogRange(std::cout, out_device.mData, ",") << std::endl;
#endif
#if 0
float
error
=
0
;
float
error
=
0
;
float
max_diff
=
0
;
float
max_diff
=
0
;
float
host_value
=
0
,
device_value
=
0
;
float
host_value
=
0
,
device_value
=
0
;
...
@@ -282,4 +264,11 @@ int main()
...
@@ -282,4 +264,11 @@ int main()
std
::
cout
<<
"max_diff: "
<<
max_diff
<<
", "
<<
host_value
<<
", "
<<
device_value
std
::
cout
<<
"max_diff: "
<<
max_diff
<<
", "
<<
host_value
<<
", "
<<
device_value
<<
std
::
endl
;
<<
std
::
endl
;
#endif
#endif
#if 0
LogRange(std::cout << __func__ << "in : ", in.mData, ",") << std::endl;
LogRange(std::cout << __func__ << "wei: ", wei.mData, ",") << std::endl;
LogRange(std::cout, out_host.mData, ",") << std::endl;
LogRange(std::cout, out_device.mData, ",") << std::endl;
#endif
}
}
src/include/constant_direct_convolution.cuh
deleted
100644 → 0
View file @
eace3255
This diff is collapsed.
Click to expand it.
src/include/direct_convolution.cuh
View file @
5d2cafcb
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment