Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
yangql
composable_kernel-1
Commits
9657baec
"vscode:/vscode.git/clone" did not exist on "313f3c07d210330a1bd04ed0604503efb18fa22c"
Commit
9657baec
authored
Nov 02, 2018
by
Chao Liu
Browse files
initial direct conv correct run
parent
dfa02139
Changes
3
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
327 additions
and
94 deletions
+327
-94
driver/conv.cu
driver/conv.cu
+43
-21
src/include/device_tensor.cuh
src/include/device_tensor.cuh
+3
-4
src/include/direct_convolution.cuh
src/include/direct_convolution.cuh
+281
-69
No files found.
driver/conv.cu
View file @
9657baec
...
@@ -7,21 +7,35 @@
...
@@ -7,21 +7,35 @@
#include "direct_convolution.cuh"
#include "direct_convolution.cuh"
template
<
class
T
>
template
<
class
T
>
struct
Generator
struct
Generator
Constant
{
{
T
value
=
0
;
T
value
=
0
;
template
<
class
...
Is
>
template
<
class
...
Is
>
T
operator
()(
Is
...
is
)
T
operator
()(
Is
...
is
)
{
{
#if 0
return
value
;
return
value
;
#else
}
};
template
<
class
T
>
struct
GeneratorTensor
{
template
<
class
...
Is
>
T
operator
()(
Is
...
is
)
{
#if 0
std::initializer_list<std::size_t> ls = {static_cast<std::size_t>(is)...};
std::initializer_list<std::size_t> ls = {static_cast<std::size_t>(is)...};
return std::accumulate(ls.begin(), ls.end(), std::size_t(0));
return std::accumulate(ls.begin(), ls.end(), std::size_t(0));
#else
assert
(
sizeof
...(
Is
)
>
0
);
std
::
initializer_list
<
std
::
size_t
>
ids
=
{
static_cast
<
std
::
size_t
>
(
is
)...};
std
::
vector
<
std
::
size_t
>
lens
(
sizeof
...(
Is
),
100
);
std
::
vector
<
std
::
size_t
>
strides
(
sizeof
...(
Is
),
1
);
std
::
partial_sum
(
lens
.
rbegin
(),
lens
.
rbegin
()
+
(
sizeof
...(
Is
)
-
1
),
strides
.
rbegin
()
+
1
);
return
std
::
inner_product
(
ids
.
begin
(),
ids
.
end
(),
strides
.
begin
(),
std
::
size_t
(
0
))
+
1
;
#endif
#endif
}
}
};
};
template
<
typename
T
>
template
<
typename
T
>
...
@@ -57,18 +71,22 @@ void host_convolution(const Tensor<T>& in,
...
@@ -57,18 +71,22 @@ void host_convolution(const Tensor<T>& in,
}
}
template
<
class
T
>
template
<
class
T
>
void
device_convolution
(
Tensor
<
T
>&
in
,
Tensor
<
T
>&
wei
,
Tensor
<
T
>&
out
)
void
device_convolution
(
const
Tensor
<
T
>&
in
,
const
Tensor
<
T
>&
wei
,
Tensor
<
T
>&
out
)
{
{
DeviceTensorDescriptor
<
4
>
in_desc_device
(
in
.
mDesc
);
DeviceTensorDescriptor
<
4
>
in_desc_device
(
in
.
mDesc
);
DeviceTensorDescriptor
<
4
>
wei_desc_device
(
wei
.
mDesc
);
DeviceTensorDescriptor
<
4
>
wei_desc_device
(
wei
.
mDesc
);
DeviceTensorDescriptor
<
4
>
out_desc_device
(
out
.
mDesc
);
DeviceTensorDescriptor
<
4
>
out_desc_device
(
out
.
mDesc
);
printf
(
"__func__: in_desc_device: %u %u %u %u
\n
"
,
printf
(
"__func__: in_desc_device:
{
%u %u %u %u
}, {%u %u %u %u}
\n
"
,
in_desc_device
.
GetLength
(
0
),
in_desc_device
.
GetLength
(
0
),
in_desc_device
.
GetLength
(
1
),
in_desc_device
.
GetLength
(
1
),
in_desc_device
.
GetLength
(
2
),
in_desc_device
.
GetLength
(
2
),
in_desc_device
.
GetLength
(
3
));
in_desc_device
.
GetLength
(
3
),
in_desc_device
.
GetStride
(
0
),
in_desc_device
.
GetStride
(
1
),
in_desc_device
.
GetStride
(
2
),
in_desc_device
.
GetStride
(
3
));
std
::
size_t
data_sz
=
sizeof
(
T
);
std
::
size_t
data_sz
=
sizeof
(
T
);
DeviceMem
in_device_buf
(
data_sz
*
in
.
mDesc
.
GetElementSpace
());
DeviceMem
in_device_buf
(
data_sz
*
in
.
mDesc
.
GetElementSpace
());
...
@@ -77,11 +95,7 @@ void device_convolution(Tensor<T>& in, Tensor<T>& wei, Tensor<T>& out)
...
@@ -77,11 +95,7 @@ void device_convolution(Tensor<T>& in, Tensor<T>& wei, Tensor<T>& out)
int
num_thread
=
std
::
thread
::
hardware_concurrency
();
int
num_thread
=
std
::
thread
::
hardware_concurrency
();
#if 1
out
.
GenerateTensorValue
(
GeneratorConstant
<
float
>
{
0
},
num_thread
);
in
.
GenerateTensorValue
(
Generator
<
float
>
{
1
},
num_thread
);
wei
.
GenerateTensorValue
(
Generator
<
float
>
{
1
},
num_thread
);
#endif
out
.
GenerateTensorValue
(
Generator
<
float
>
{
0
},
num_thread
);
in_device_buf
.
ToDevice
(
in
.
mData
.
data
());
in_device_buf
.
ToDevice
(
in
.
mData
.
data
());
wei_device_buf
.
ToDevice
(
wei
.
mData
.
data
());
wei_device_buf
.
ToDevice
(
wei
.
mData
.
data
());
...
@@ -89,7 +103,7 @@ void device_convolution(Tensor<T>& in, Tensor<T>& wei, Tensor<T>& out)
...
@@ -89,7 +103,7 @@ void device_convolution(Tensor<T>& in, Tensor<T>& wei, Tensor<T>& out)
dim3
block_dim
(
64
,
1
,
1
);
dim3
block_dim
(
64
,
1
,
1
);
dim3
grid_dim
(
1
,
1
,
1
);
dim3
grid_dim
(
1
,
1
,
1
);
gridwise_convolution
<
T
,
3
,
3
,
4
,
4
,
2
,
2
,
1
,
1
,
32
,
32
,
1
>
gridwise_convolution
<
T
,
3
,
3
,
4
,
4
,
2
,
2
,
1
,
1
,
8
,
8
,
1
>
<<<
grid_dim
,
block_dim
>>>
(
in_desc_device
,
<<<
grid_dim
,
block_dim
>>>
(
in_desc_device
,
static_cast
<
T
*>
(
in_device_buf
.
GetDeviceBuffer
()),
static_cast
<
T
*>
(
in_device_buf
.
GetDeviceBuffer
()),
wei_desc_device
,
wei_desc_device
,
...
@@ -97,6 +111,7 @@ void device_convolution(Tensor<T>& in, Tensor<T>& wei, Tensor<T>& out)
...
@@ -97,6 +111,7 @@ void device_convolution(Tensor<T>& in, Tensor<T>& wei, Tensor<T>& out)
out_desc_device
,
out_desc_device
,
static_cast
<
T
*>
(
out_device_buf
.
GetDeviceBuffer
()));
static_cast
<
T
*>
(
out_device_buf
.
GetDeviceBuffer
()));
checkCudaErrors
(
cudaGetLastError
());
out_device_buf
.
FromDevice
(
out
.
mData
.
data
());
out_device_buf
.
FromDevice
(
out
.
mData
.
data
());
}
}
...
@@ -125,16 +140,23 @@ int main()
...
@@ -125,16 +140,23 @@ int main()
std
::
cout
<<
__func__
<<
": num_thread "
<<
num_thread
<<
std
::
endl
;
std
::
cout
<<
__func__
<<
": num_thread "
<<
num_thread
<<
std
::
endl
;
in
.
GenerateTensorValue
(
Generator
<
float
>
{
1
},
num_thread
);
in
.
GenerateTensorValue
(
Generator
Tensor
<
float
>
{},
num_thread
);
wei
.
GenerateTensorValue
(
Generator
<
float
>
{
1
},
num_thread
);
wei
.
GenerateTensorValue
(
Generator
Tensor
<
float
>
{},
num_thread
);
//
host_convolution(in, wei, out_host, num_thread);
host_convolution
(
in
,
wei
,
out_host
,
num_thread
);
device_convolution
(
in
,
wei
,
out_device
);
device_convolution
(
in
,
wei
,
out_device
);
std
::
cout
<<
__func__
<<
": done"
<<
std
::
endl
;
std
::
cout
<<
__func__
<<
": done"
<<
std
::
endl
;
LogRange
(
std
::
cout
,
in
.
mData
,
","
)
<<
std
::
endl
;
LogRange
(
std
::
cout
<<
__func__
<<
"in : "
,
in
.
mData
,
","
)
<<
std
::
endl
;
LogRange
(
std
::
cout
,
wei
.
mData
,
","
)
<<
std
::
endl
;
LogRange
(
std
::
cout
<<
__func__
<<
"wei: "
,
wei
.
mData
,
","
)
<<
std
::
endl
;
//
LogRange(std::cout, out_host.mData, ",") << std::endl;
LogRange
(
std
::
cout
,
out_host
.
mData
,
","
)
<<
std
::
endl
;
LogRange
(
std
::
cout
,
out_device
.
mData
,
","
)
<<
std
::
endl
;
LogRange
(
std
::
cout
,
out_device
.
mData
,
","
)
<<
std
::
endl
;
float
error
=
0
;
for
(
int
i
=
0
;
i
<
out_host
.
mData
.
size
();
++
i
)
{
error
+=
std
::
abs
(
out_host
.
mData
[
i
]
-
out_device
.
mData
[
i
]);
}
std
::
cout
<<
"error: "
<<
error
<<
std
::
endl
;
}
}
src/include/device_tensor.cuh
View file @
9657baec
...
@@ -17,15 +17,14 @@ struct DeviceTensorDescriptor
...
@@ -17,15 +17,14 @@ struct DeviceTensorDescriptor
__host__
__device__
unsigned
GetLength
(
unsigned
i
)
const
{
return
mpLengths
[
i
];
}
__host__
__device__
unsigned
GetLength
(
unsigned
i
)
const
{
return
mpLengths
[
i
];
}
__host__
__device__
unsigned
long
GetStride
(
unsigned
i
)
const
{
return
mpStrides
[
i
];
}
__host__
__device__
unsigned
GetStride
(
unsigned
i
)
const
{
return
mpStrides
[
i
];
}
// this is ugly
// this is ugly
__host__
__device__
unsigned
long
__host__
__device__
unsigned
Get1dIndex
(
unsigned
n
,
unsigned
c
,
unsigned
h
,
unsigned
w
)
const
Get1dIndex
(
unsigned
n
,
unsigned
c
,
unsigned
h
,
unsigned
w
)
const
{
{
return
n
*
mpStrides
[
0
]
+
c
*
mpStrides
[
1
]
+
h
*
mpStrides
[
2
]
+
w
*
mpStrides
[
3
];
return
n
*
mpStrides
[
0
]
+
c
*
mpStrides
[
1
]
+
h
*
mpStrides
[
2
]
+
w
*
mpStrides
[
3
];
}
}
unsigned
mpLengths
[
NDim
];
unsigned
mpLengths
[
NDim
];
unsigned
long
mpStrides
[
NDim
];
unsigned
mpStrides
[
NDim
];
};
};
src/include/direct_convolution.cuh
View file @
9657baec
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment