Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
126cae0c
Commit
126cae0c
authored
Sep 17, 2019
by
Chao Liu
Browse files
bug fix
parent
e1a67b69
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
34 additions
and
41 deletions
+34
-41
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw_padded.hpp
..._convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw_padded.hpp
+8
-0
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw_padded_lds_double_buffer.hpp
...cit_gemm_v4r1_nchw_kcyx_nkhw_padded_lds_double_buffer.hpp
+11
-7
composable_kernel/include/tensor_description/tensor_coordinate.hpp
...e_kernel/include/tensor_description/tensor_coordinate.hpp
+2
-2
composable_kernel/include/tensor_description/tensor_coordinate_v2.hpp
...ernel/include/tensor_description/tensor_coordinate_v2.hpp
+2
-2
composable_kernel/include/tensor_description/tensor_descriptor.hpp
...e_kernel/include/tensor_description/tensor_descriptor.hpp
+0
-19
driver/src/driver.cpp
driver/src/driver.cpp
+11
-11
No files found.
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw_padded.hpp
View file @
126cae0c
...
@@ -185,6 +185,7 @@ struct GridwiseConvolutionImplicitGemm_v4r1_nchw_kcyx_nkhw_padded
...
@@ -185,6 +185,7 @@ struct GridwiseConvolutionImplicitGemm_v4r1_nchw_kcyx_nkhw_padded
InBlockCopyDstDataPerWrite_N2
>
(
InBlockCopyDstDataPerWrite_N2
>
(
{
0
,
0
,
b_block_data_on_global
,
0
},
{
0
,
0
,
0
,
0
});
{
0
,
0
,
b_block_data_on_global
,
0
},
{
0
,
0
,
0
,
0
});
#if 0
// weight tensor
// weight tensor
// tensor descriptor in device memory, src of blockwise copy
// tensor descriptor in device memory, src of blockwise copy
constexpr auto wei_e_k_global_desc =
constexpr auto wei_e_k_global_desc =
...
@@ -192,6 +193,13 @@ struct GridwiseConvolutionImplicitGemm_v4r1_nchw_kcyx_nkhw_padded
...
@@ -192,6 +193,13 @@ struct GridwiseConvolutionImplicitGemm_v4r1_nchw_kcyx_nkhw_padded
make_tuple(Merge<Sequence<C, Y, X>>{}, PassThrough<K>{}),
make_tuple(Merge<Sequence<C, Y, X>>{}, PassThrough<K>{}),
make_tuple(Sequence<1, 2, 3>{}, Sequence<0>{}),
make_tuple(Sequence<1, 2, 3>{}, Sequence<0>{}),
make_tuple(Sequence<0>{}, Sequence<1>{}));
make_tuple(Sequence<0>{}, Sequence<1>{}));
#else
// hack
constexpr
auto
wei_e_k_global_desc_old
=
WeiGlobalDesc
::
Unfold
(
I1
,
I3
).
ReorderGivenNew2Old
(
Sequence
<
1
,
0
>
{});
constexpr
auto
wei_e_k_global_desc
=
make_native_tensor_descriptor
(
wei_e_k_global_desc_old
.
GetLengths
(),
wei_e_k_global_desc_old
.
GetStrides
());
#endif
// tensor descriptor in LDS, dst of blockwise copy
// tensor descriptor in LDS, dst of blockwise copy
// be careful of LDS alignment
// be careful of LDS alignment
...
...
composable_kernel/include/kernel_algorithm/gridwise_convolution_implicit_gemm_v4r1_nchw_kcyx_nkhw_padded_lds_double_buffer.hpp
View file @
126cae0c
...
@@ -184,23 +184,27 @@ struct GridwiseConvolutionImplicitGemm_v4r1_nchw_kcyx_nkhw_padded_lds_double_buf
...
@@ -184,23 +184,27 @@ struct GridwiseConvolutionImplicitGemm_v4r1_nchw_kcyx_nkhw_padded_lds_double_buf
InBlockCopyDstDataPerWrite_N2
>
(
InBlockCopyDstDataPerWrite_N2
>
(
{
0
,
0
,
b_block_data_on_global
,
0
},
{
0
,
0
,
0
,
0
});
{
0
,
0
,
b_block_data_on_global
,
0
},
{
0
,
0
,
0
,
0
});
#if 0
// weight tensor
// weight tensor
// tensor descriptor in device memory, src of blockwise copy
// tensor descriptor in device memory, src of blockwise copy
constexpr auto wei_e_k_global_desc =
constexpr auto wei_e_k_global_desc =
#if 0
transform_tensor_descriptor(wei_k_c_y_x_global_desc,
transform_tensor_descriptor(wei_k_c_y_x_global_desc,
make_tuple(Merge<Sequence<C, Y, X>>{}, PassThrough<K>{}),
make_tuple(Merge<Sequence<C, Y, X>>{}, PassThrough<K>{}),
make_tuple(Sequence<1, 2, 3>{}, Sequence<0>{}),
make_tuple(Sequence<1, 2, 3>{}, Sequence<0>{}),
make_tuple(Sequence<0>{}, Sequence<1>{}));
make_tuple(Sequence<0>{}, Sequence<1>{}));
#else
// hack
#else
// hack
make_native_tensor_descriptor_packed
(
Sequence
<
K
,
C
*
Y
*
X
>
{});
constexpr
auto
wei_e_k_global_desc_old
=
WeiGlobalDesc
::
Unfold
(
I1
,
I3
).
ReorderGivenNew2Old
(
Sequence
<
1
,
0
>
{});
constexpr
auto
wei_e_k_global_desc
=
make_native_tensor_descriptor
(
wei_e_k_global_desc_old
.
GetLengths
(),
wei_e_k_global_desc_old
.
GetStrides
());
#endif
#endif
// tensor descriptor in LDS, dst of blockwise copy
// tensor descriptor in LDS, dst of blockwise copy
// be careful of LDS alignment
// be careful of LDS alignment
constexpr
auto
wei_e_k_block_desc
=
make_native_tensor_descriptor_aligned
(
constexpr
auto
wei_e_k_block_desc
=
make_native_tensor_descriptor_aligned
(
Sequence
<
EPerBlock
,
KPerBlock
>
{},
Sequence
<
EPerBlock
,
KPerBlock
>
{},
Number
<
math
::
lcm
(
WeiBlockCopyDstDataPerWrite_K
,
GemmDataPerReadA
)
>
{});
Number
<
math
::
lcm
(
WeiBlockCopyDstDataPerWrite_K
,
GemmDataPerReadA
)
>
{});
// operator for blockwise copy of weight into LDS
// operator for blockwise copy of weight into LDS
// slice a tensor, and copy it into another tensor
// slice a tensor, and copy it into another tensor
...
...
composable_kernel/include/tensor_description/tensor_coordinate.hpp
View file @
126cae0c
...
@@ -313,14 +313,14 @@ struct TensorCoordinate
...
@@ -313,14 +313,14 @@ struct TensorCoordinate
private:
private:
template
<
class
...
Ts
>
template
<
class
...
Ts
>
__host__
__device__
static
constexpr
auto
__host__
__device__
static
constexpr
auto
MakeDummyTensorCoordinate
(
ConstantTensorDescriptor
<
Ts
...
>
)
MakeDummyTensorCoordinate
(
ConstantTensorDescriptor
<
Ts
...
>
)
{
{
return
NormalTensorCoordinate
<
ConstantTensorDescriptor
<
Ts
...
>>
();
return
NormalTensorCoordinate
<
ConstantTensorDescriptor
<
Ts
...
>>
();
}
}
template
<
class
...
Ts
>
template
<
class
...
Ts
>
__host__
__device__
static
constexpr
auto
__host__
__device__
static
constexpr
auto
MakeDummyTensorCoordinate
(
ConstantMergedTensorDescriptor
<
Ts
...
>
)
MakeDummyTensorCoordinate
(
ConstantMergedTensorDescriptor
<
Ts
...
>
)
{
{
return
MergedTensorCoordinate
<
ConstantMergedTensorDescriptor
<
Ts
...
>>
();
return
MergedTensorCoordinate
<
ConstantMergedTensorDescriptor
<
Ts
...
>>
();
}
}
...
...
composable_kernel/include/tensor_description/tensor_coordinate_v2.hpp
View file @
126cae0c
...
@@ -188,7 +188,7 @@ struct TensorCoordinate_v2
...
@@ -188,7 +188,7 @@ struct TensorCoordinate_v2
private:
private:
template
<
typename
...
Ts
>
template
<
typename
...
Ts
>
__host__
__device__
static
constexpr
auto
__host__
__device__
static
constexpr
auto
MakeDummyTensorCoordinate
(
NativeTensorDescriptor
<
Ts
...
>
)
MakeDummyTensorCoordinate
(
NativeTensorDescriptor
<
Ts
...
>
)
{
{
return
NativeTensorCoordinate
<
NativeTensorDescriptor
<
Ts
...
>>
(
return
NativeTensorCoordinate
<
NativeTensorDescriptor
<
Ts
...
>>
(
make_zero_array
<
index_t
,
TensorDesc
::
GetNumOfDimension
()
>
());
make_zero_array
<
index_t
,
TensorDesc
::
GetNumOfDimension
()
>
());
...
@@ -196,7 +196,7 @@ struct TensorCoordinate_v2
...
@@ -196,7 +196,7 @@ struct TensorCoordinate_v2
template
<
typename
...
Ts
>
template
<
typename
...
Ts
>
__host__
__device__
static
constexpr
auto
__host__
__device__
static
constexpr
auto
MakeDummyTensorCoordinate
(
TransformedTensorDescriptor
<
Ts
...
>
)
MakeDummyTensorCoordinate
(
TransformedTensorDescriptor
<
Ts
...
>
)
{
{
return
TransformedTensorCoordinate
<
TransformedTensorDescriptor
<
Ts
...
>>
(
return
TransformedTensorCoordinate
<
TransformedTensorDescriptor
<
Ts
...
>>
(
make_zero_array
<
index_t
,
TensorDesc
::
GetNumOfDimension
()
>
());
make_zero_array
<
index_t
,
TensorDesc
::
GetNumOfDimension
()
>
());
...
...
composable_kernel/include/tensor_description/tensor_descriptor.hpp
View file @
126cae0c
...
@@ -85,12 +85,6 @@ struct NativeTensorDescriptor
...
@@ -85,12 +85,6 @@ struct NativeTensorDescriptor
return
offset
;
return
offset
;
}
}
// TODO: remove this
__host__
__device__
static
constexpr
index_t
GetOffsetFromMultiIndex
(
const
Index
&
idx
)
{
return
CalculateOffset
(
idx
);
}
__host__
__device__
static
constexpr
index_t
CalculateOffsetDiff
(
const
Index
&
idx_diff
)
__host__
__device__
static
constexpr
index_t
CalculateOffsetDiff
(
const
Index
&
idx_diff
)
{
{
index_t
offset_diff
=
0
;
index_t
offset_diff
=
0
;
...
@@ -227,13 +221,6 @@ struct TransformedTensorDescriptor
...
@@ -227,13 +221,6 @@ struct TransformedTensorDescriptor
return
LowTensorDescriptor
{};
return
LowTensorDescriptor
{};
}
}
#if 0
__host__ __device__ static constexpr auto GetLowerLengths()
{
return GetLowerTensorDescriptor().GetLengths();
}
#endif
struct
lambda_GetUpperLengths
struct
lambda_GetUpperLengths
{
{
template
<
typename
Transform
>
template
<
typename
Transform
>
...
@@ -359,12 +346,6 @@ struct TransformedTensorDescriptor
...
@@ -359,12 +346,6 @@ struct TransformedTensorDescriptor
return
GetLowerTensorDescriptor
().
CalculateOffset
(
CalculateLowerIndex
(
idx_up
));
return
GetLowerTensorDescriptor
().
CalculateOffset
(
CalculateLowerIndex
(
idx_up
));
}
}
// TODO: remove this
__host__
__device__
static
constexpr
index_t
GetOffsetFromMultiIndex
(
const
UpperIndex
&
idx_up
)
{
return
CalculateOffset
(
idx_up
);
}
#if 0
#if 0
template <index_t IDim>
template <index_t IDim>
__host__ __device__ static constexpr bool IsLinearDimension(Number<IDim>)
__host__ __device__ static constexpr bool IsLinearDimension(Number<IDim>)
...
...
driver/src/driver.cpp
View file @
126cae0c
...
@@ -49,7 +49,7 @@ struct GeneratorTensor_3
...
@@ -49,7 +49,7 @@ struct GeneratorTensor_3
{
{
std
::
array
<
index_t
,
sizeof
...(
Is
)
>
dims
=
{{
static_cast
<
index_t
>
(
is
)...}};
std
::
array
<
index_t
,
sizeof
...(
Is
)
>
dims
=
{{
static_cast
<
index_t
>
(
is
)...}};
auto
f_acc
=
[](
auto
a
,
auto
b
)
{
return
10
0
*
a
+
b
;
};
auto
f_acc
=
[](
auto
a
,
auto
b
)
{
return
10
*
a
+
b
;
};
return
std
::
accumulate
(
dims
.
begin
(),
dims
.
end
(),
index_t
(
0
),
f_acc
);
return
std
::
accumulate
(
dims
.
begin
(),
dims
.
end
(),
index_t
(
0
),
f_acc
);
}
}
...
@@ -75,19 +75,19 @@ int main(int argc, char* argv[])
...
@@ -75,19 +75,19 @@ int main(int argc, char* argv[])
using
namespace
ck
;
using
namespace
ck
;
#if 0
#if 0
constexpr index_t N =
256
;
constexpr index_t N =
8
;
constexpr index_t C =
64
;
constexpr index_t C =
8
;
constexpr index_t HI =
17
;
constexpr index_t HI =
2
;
constexpr index_t WI =
17
;
constexpr index_t WI =
8
;
constexpr index_t K =
256
;
constexpr index_t K =
128
;
constexpr index_t Y = 1
7
;
constexpr index_t Y = 1;
constexpr index_t X = 1
7
;
constexpr index_t X = 1;
using ConvStrides = Sequence<1, 1>;
using ConvStrides = Sequence<1, 1>;
using ConvDilations = Sequence<1, 1>;
using ConvDilations = Sequence<1, 1>;
using LeftPads = Sequence<0,
3
>;
using LeftPads = Sequence<0,
0
>;
using RightPads = Sequence<0,
3
>;
using RightPads = Sequence<0,
0
>;
#elif
0
#elif
0
// 3x3, 34x34
// 3x3, 34x34
constexpr
index_t
N
=
64
;
constexpr
index_t
N
=
64
;
...
@@ -347,7 +347,7 @@ int main(int argc, char* argv[])
...
@@ -347,7 +347,7 @@ int main(int argc, char* argv[])
wei_kcyx.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
wei_kcyx.GenerateTensorValue(GeneratorTensor_1{}, num_thread);
#elif
0
#elif
0
in_nchw
.
GenerateTensorValue
(
GeneratorTensor_1
{},
num_thread
);
in_nchw
.
GenerateTensorValue
(
GeneratorTensor_1
{},
num_thread
);
wei_kcyx
.
GenerateTensorValue
(
GeneratorTensor_
2
{
-
5
,
5
},
num_thread
);
wei_kcyx
.
GenerateTensorValue
(
GeneratorTensor_
3
{
},
num_thread
);
#elif 0
#elif 0
in_nchw
.
GenerateTensorValue
(
GeneratorTensor_3
{},
num_thread
);
in_nchw
.
GenerateTensorValue
(
GeneratorTensor_3
{},
num_thread
);
wei_kcyx
.
GenerateTensorValue
(
GeneratorTensor_1
{},
num_thread
);
wei_kcyx
.
GenerateTensorValue
(
GeneratorTensor_1
{},
num_thread
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment