Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
yangql
composable_kernel-1
Commits
0404f777
"...git@developer.sourcefind.cn:change/sglang.git" did not exist on "c77c1e05badb5f5bf774872c3498b21eeb0aef20"
Commit
0404f777
authored
Nov 13, 2018
by
Chao Liu
Browse files
refactor
parent
08c7f743
Changes
3
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
17 additions
and
483 deletions
+17
-483
driver/conv.cu
driver/conv.cu
+4
-4
src/include/blockwise_tensor_op.cuh
src/include/blockwise_tensor_op.cuh
+13
-17
src/include/direct_convolution.cuh
src/include/direct_convolution.cuh
+0
-462
No files found.
driver/conv.cu
View file @
0404f777
...
...
@@ -5,7 +5,7 @@
#include "nvToolsExt.h"
#include "tensor.hpp"
#include "constant_tensor_descriptor.cuh"
#include "direct_convolution.cuh"
#include "direct_convolution
_2
.cuh"
template
<
class
T
>
struct
GeneratorConstant
...
...
@@ -133,8 +133,8 @@ void device_convolution(
constexpr
auto
wei_desc
=
WeiDesc
{};
constexpr
auto
out_desc
=
OutDesc
{};
constexpr
unsigned
NPerBlock
=
1
;
constexpr
unsigned
KPerBlock
=
1
;
constexpr
unsigned
CPerBlockLoop
=
1
;
constexpr
unsigned
KPerBlock
=
2
;
constexpr
unsigned
CPerBlockLoop
=
4
;
constexpr
unsigned
OutTileSizeH
=
2
;
constexpr
unsigned
OutTileSizeW
=
2
;
constexpr
unsigned
YPerBlock
=
8
;
...
...
@@ -213,7 +213,7 @@ int main()
constexpr
unsigned
C
=
256
;
constexpr
unsigned
HI
=
34
;
constexpr
unsigned
WI
=
34
;
constexpr
unsigned
K
=
5
6
;
constexpr
unsigned
K
=
6
4
;
constexpr
unsigned
S
=
3
;
constexpr
unsigned
R
=
3
;
#elif 0
...
...
src/include/blockwise_tensor_op.cuh
View file @
0404f777
...
...
@@ -62,13 +62,9 @@ __device__ void blockwise_4d_tensor_op(
{
for(unsigned did3 = did3_begin; did3 < src_desc.GetLength(I3); did3 += NWorkLen3)
{
const unsigned sindex =
src_desc.GetStride(I0) * did0 + src_desc.GetStride(I1) * did1 +
src_desc.GetStride(I2) * did2 + src_desc.GetStride(I3) * did3;
const unsigned sindex = src_desc.Get1dIndex(did0, did1, did2, did3);
const unsigned dindex =
dst_desc.GetStride(I0) * did0 + dst_desc.GetStride(I1) * did1 +
dst_desc.GetStride(I2) * did2 + dst_desc.GetStride(I3) * did3;
const unsigned dindex = dst_desc.Get1dIndex(did0, did1, did2, did3);
f(p_src[dindex], p_dst[sindex]);
...
...
@@ -115,6 +111,8 @@ __device__ void blockwise_4d_tensor_op(
static_assert
(
is_same
<
decltype
(
src_desc
.
GetLengths
()),
decltype
(
dst_desc
.
GetLengths
())
>::
value
);
constexpr
auto
desc
=
make_ConstantTensorDescriptor
(
src_desc
.
GetLengths
());
#if 0
if(threadIdx.x == 0)
{
...
...
@@ -125,29 +123,27 @@ __device__ void blockwise_4d_tensor_op(
unsigned
lid
=
threadIdx
.
x
;
for
(
unsigned
i
=
lid
;
i
<
src_
desc
.
GetElementSize
();
i
+=
BlockSize
)
for
(
unsigned
i
=
lid
;
i
<
desc
.
GetElementSize
();
i
+=
BlockSize
)
{
unsigned
is
=
i
;
const
unsigned
did0
=
is
/
src_
desc
.
GetStride
(
I0
);
const
unsigned
did0
=
is
/
desc
.
GetStride
(
I0
);
is
-=
did0
*
src_
desc
.
GetStride
(
I0
);
is
-=
did0
*
desc
.
GetStride
(
I0
);
const
unsigned
did1
=
is
/
src_
desc
.
GetStride
(
I1
);
const
unsigned
did1
=
is
/
desc
.
GetStride
(
I1
);
is
-=
did1
*
src_
desc
.
GetStride
(
I1
);
is
-=
did1
*
desc
.
GetStride
(
I1
);
const
unsigned
did2
=
is
/
src_
desc
.
GetStride
(
I2
);
const
unsigned
did2
=
is
/
desc
.
GetStride
(
I2
);
is
-=
did2
*
src_
desc
.
GetStride
(
I2
);
is
-=
did2
*
desc
.
GetStride
(
I2
);
const
unsigned
did3
=
is
/
src_desc
.
GetStride
(
I3
);
const
unsigned
sindex
=
src_desc
.
GetStride
(
I0
)
*
did0
+
src_desc
.
GetStride
(
I1
)
*
did1
+
src_desc
.
GetStride
(
I2
)
*
did2
+
src_desc
.
GetStride
(
I3
)
*
did3
;
const
unsigned
sindex
=
src_desc
.
Get1dIndex
(
did0
,
did1
,
did2
,
did3
);
const
unsigned
dindex
=
dst_desc
.
GetStride
(
I0
)
*
did0
+
dst_desc
.
GetStride
(
I1
)
*
did1
+
dst_desc
.
GetStride
(
I2
)
*
did2
+
dst_desc
.
GetStride
(
I3
)
*
did3
;
const
unsigned
dindex
=
dst_desc
.
Get1dIndex
(
did0
,
did1
,
did2
,
did3
);
f
(
p_src
[
sindex
],
p_dst
[
dindex
]);
}
...
...
src/include/direct_convolution.cuh
deleted
100644 → 0
View file @
08c7f743
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment