Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
5514dc0b
Commit
5514dc0b
authored
Aug 18, 2020
by
Chao Liu
Browse files
prototype dynamic tensor descriptor
parent
0b7fcca6
Changes
15
Hide whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
171 additions
and
330 deletions
+171
-330
composable_kernel/include/kernel_algorithm/dummy_dynamic_transform.hpp
...rnel/include/kernel_algorithm/dummy_dynamic_transform.hpp
+79
-58
composable_kernel/include/tensor_description/dynamic_multi_index_transform.hpp
...lude/tensor_description/dynamic_multi_index_transform.hpp
+17
-12
composable_kernel/include/tensor_description/dynamic_tensor_descriptor.hpp
.../include/tensor_description/dynamic_tensor_descriptor.hpp
+11
-6
composable_kernel/include/utility/array.hpp
composable_kernel/include/utility/array.hpp
+1
-1
composable_kernel/include/utility/array_helper.hpp
composable_kernel/include/utility/array_helper.hpp
+1
-1
composable_kernel/include/utility/common_header.hpp
composable_kernel/include/utility/common_header.hpp
+10
-10
composable_kernel/include/utility/functional.hpp
composable_kernel/include/utility/functional.hpp
+8
-5
composable_kernel/include/utility/functional3.hpp
composable_kernel/include/utility/functional3.hpp
+9
-5
composable_kernel/include/utility/math.hpp
composable_kernel/include/utility/math.hpp
+1
-1
composable_kernel/include/utility/print_array.hpp
composable_kernel/include/utility/print_array.hpp
+0
-177
composable_kernel/include/utility/print_sequence.hpp
composable_kernel/include/utility/print_sequence.hpp
+0
-46
composable_kernel/include/utility/tuple.hpp
composable_kernel/include/utility/tuple.hpp
+1
-1
composable_kernel/include/utility/utility.hpp
composable_kernel/include/utility/utility.hpp
+1
-1
driver/include/device_dummy_dynamic_transform.hpp
driver/include/device_dummy_dynamic_transform.hpp
+27
-0
driver/src/conv_driver.cpp
driver/src/conv_driver.cpp
+5
-6
No files found.
composable_kernel/include/kernel_algorithm/dummy_dynamic_transform.hpp
View file @
5514dc0b
...
...
@@ -8,6 +8,76 @@
#include "dynamic_tensor_descriptor_helper.hpp"
namespace
ck
{
__host__
__device__
constexpr
auto
map_convolution_into_gemm
(
const
DynamicNativeTensorDescriptor
<
4
>
wei_k_c_y_x_global_desc
,
const
DynamicNativeTensorDescriptor
<
4
>
in_n_c_hi_wi_global_desc
,
const
DynamicNativeTensorDescriptor
<
4
>
out_n_k_ho_wo_global_desc
,
const
Array
<
index_t
,
2
>
conv_strides
,
const
Array
<
index_t
,
2
>
conv_dilations
,
const
Array
<
index_t
,
2
>
in_left_pads
,
const
Array
<
index_t
,
2
>
in_right_pads
)
{
const
index_t
N
=
in_n_c_hi_wi_global_desc
.
GetLength
(
0
);
const
index_t
C
=
in_n_c_hi_wi_global_desc
.
GetLength
(
1
);
const
index_t
K
=
out_n_k_ho_wo_global_desc
.
GetLength
(
1
);
const
index_t
Y
=
wei_k_c_y_x_global_desc
.
GetLength
(
2
);
const
index_t
X
=
wei_k_c_y_x_global_desc
.
GetLength
(
3
);
const
index_t
Hi
=
in_n_c_hi_wi_global_desc
.
GetLength
(
2
);
const
index_t
Wi
=
in_n_c_hi_wi_global_desc
.
GetLength
(
3
);
const
index_t
Ho
=
out_n_k_ho_wo_global_desc
.
GetLength
(
2
);
const
index_t
Wo
=
out_n_k_ho_wo_global_desc
.
GetLength
(
3
);
const
index_t
ConvStrideH
=
conv_strides
[
0
];
const
index_t
ConvStrideW
=
conv_strides
[
1
];
const
index_t
ConvDilationH
=
conv_dilations
[
0
];
const
index_t
ConvDilationW
=
conv_dilations
[
1
];
const
index_t
InLeftPadH
=
in_left_pads
[
0
];
const
index_t
InLeftPadW
=
in_left_pads
[
1
];
const
index_t
InRightPadH
=
in_right_pads
[
0
];
const
index_t
InRightPadW
=
in_right_pads
[
1
];
// input tensor
const
auto
in_n_c_hip_wip_global_desc
=
transform_dynamic_tensor_descriptor
(
transform_dynamic_tensor_descriptor
(
in_n_c_hi_wi_global_desc
,
make_tuple
(
DynamicPassThrough
{
N
},
DynamicPassThrough
{
C
},
DynamicLeftPad
{
Hi
,
InLeftPadH
},
DynamicLeftPad
{
Wi
,
InLeftPadW
}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{},
Sequence
<
3
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{},
Sequence
<
3
>
{})),
make_tuple
(
DynamicPassThrough
{
N
},
DynamicPassThrough
{
C
},
DynamicRightPad
{
Hi
+
InLeftPadH
,
InRightPadH
},
DynamicRightPad
{
Wi
+
InLeftPadW
,
InRightPadW
}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{},
Sequence
<
3
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{},
Sequence
<
3
>
{}));
const
index_t
Hip
=
in_n_c_hip_wip_global_desc
.
GetLength
(
2
);
const
index_t
Wip
=
in_n_c_hip_wip_global_desc
.
GetLength
(
3
);
const
auto
in_n_c_y_ho_x_wo_global_desc
=
transform_dynamic_tensor_descriptor
(
in_n_c_hip_wip_global_desc
,
make_tuple
(
DynamicPassThrough
{
N
},
DynamicPassThrough
{
C
},
DynamicEmbed
<
2
>
{{
Y
,
Ho
},
{
ConvDilationH
,
ConvStrideH
,
0
}},
DynamicEmbed
<
2
>
{{
X
,
Wo
},
{
ConvDilationW
,
ConvStrideW
,
0
}}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{},
Sequence
<
3
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
,
3
>
{},
Sequence
<
4
,
5
>
{}));
const
auto
in_gemmk_gemmn_global_desc
=
transform_dynamic_tensor_descriptor
(
in_n_c_y_ho_x_wo_global_desc
,
make_tuple
(
DynamicMerge
<
3
>
{{
C
,
Y
,
X
}},
DynamicMerge
<
3
>
{{
N
,
Ho
,
Wo
}}),
make_tuple
(
Sequence
<
1
,
2
,
4
>
{},
Sequence
<
0
,
3
,
5
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{}));
return
make_tuple
(
in_gemmk_gemmn_global_desc
);
}
template
<
index_t
BlockSize
>
struct
DummyDynamicTransform
...
...
@@ -462,64 +532,15 @@ struct DummyDynamicTransform
const
Array
<
index_t
,
2
>
in_left_pads
,
const
Array
<
index_t
,
2
>
in_right_pads
)
const
{
const
index_t
N
=
in_n_c_hi_wi_global_desc
.
GetLength
(
0
);
const
index_t
C
=
in_n_c_hi_wi_global_desc
.
GetLength
(
1
);
const
index_t
K
=
out_n_k_ho_wo_global_desc
.
GetLength
(
1
);
const
index_t
Y
=
wei_k_c_y_x_global_desc
.
GetLength
(
2
);
const
index_t
X
=
wei_k_c_y_x_global_desc
.
GetLength
(
3
);
const
index_t
Hi
=
in_n_c_hi_wi_global_desc
.
GetLength
(
2
);
const
index_t
Wi
=
in_n_c_hi_wi_global_desc
.
GetLength
(
3
);
const
index_t
Ho
=
out_n_k_ho_wo_global_desc
.
GetLength
(
2
);
const
index_t
Wo
=
out_n_k_ho_wo_global_desc
.
GetLength
(
3
);
const
index_t
ConvStrideH
=
conv_strides
[
0
];
const
index_t
ConvStrideW
=
conv_strides
[
1
];
const
index_t
ConvDilationH
=
conv_dilations
[
0
];
const
index_t
ConvDilationW
=
conv_dilations
[
1
];
const
index_t
InLeftPadH
=
in_left_pads
[
0
];
const
index_t
InLeftPadW
=
in_left_pads
[
1
];
const
index_t
InRightPadH
=
in_right_pads
[
0
];
const
index_t
InRightPadW
=
in_right_pads
[
1
];
// input tensor
const
auto
in_n_c_hip_wip_global_desc
=
transform_dynamic_tensor_descriptor
(
transform_dynamic_tensor_descriptor
(
in_n_c_hi_wi_global_desc
,
make_tuple
(
DynamicPassThrough
{
N
},
DynamicPassThrough
{
C
},
DynamicLeftPad
{
Hi
,
InLeftPadH
},
DynamicLeftPad
{
Wi
,
InLeftPadW
}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{},
Sequence
<
3
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{},
Sequence
<
3
>
{})),
make_tuple
(
DynamicPassThrough
{
N
},
DynamicPassThrough
{
C
},
DynamicRightPad
{
Hi
+
InLeftPadH
,
InRightPadH
},
DynamicRightPad
{
Wi
+
InLeftPadW
,
InRightPadW
}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{},
Sequence
<
3
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{},
Sequence
<
3
>
{}));
const
index_t
Hip
=
in_n_c_hip_wip_global_desc
.
GetLength
(
2
);
const
index_t
Wip
=
in_n_c_hip_wip_global_desc
.
GetLength
(
3
);
const
auto
in_n_c_y_ho_x_wo_global_desc
=
transform_dynamic_tensor_descriptor
(
in_n_c_hip_wip_global_desc
,
make_tuple
(
DynamicPassThrough
{
N
},
DynamicPassThrough
{
C
},
DynamicEmbed
<
2
>
{{
Y
,
Ho
},
{
ConvDilationH
,
ConvStrideH
,
0
}},
DynamicEmbed
<
2
>
{{
X
,
Wo
},
{
ConvDilationW
,
ConvStrideW
,
0
}}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
>
{},
Sequence
<
3
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{},
Sequence
<
2
,
3
>
{},
Sequence
<
4
,
5
>
{}));
const
auto
in_gemmk_gemmn_global_desc
=
transform_dynamic_tensor_descriptor
(
in_n_c_y_ho_x_wo_global_desc
,
make_tuple
(
DynamicMerge
<
3
>
{{
C
,
Y
,
X
}},
DynamicMerge
<
3
>
{{
N
,
Ho
,
Wo
}}),
make_tuple
(
Sequence
<
1
,
2
,
4
>
{},
Sequence
<
0
,
3
,
5
>
{}),
make_tuple
(
Sequence
<
0
>
{},
Sequence
<
1
>
{}));
const
auto
transformed_tensor_descs
=
map_convolution_into_gemm
(
wei_k_c_y_x_global_desc
,
in_n_c_hi_wi_global_desc
,
out_n_k_ho_wo_global_desc
,
conv_strides
,
conv_dilations
,
in_left_pads
,
in_right_pads
);
const
auto
in_gemmk_gemmn_global_desc
=
transformed_tensor_descs
.
At
(
Number
<
0
>
{});
#pragma unroll 1
for
(
index_t
iter
=
0
;
iter
<
100
;
++
iter
)
...
...
composable_kernel/include/tensor_description/dynamic_multi_index_transform.hpp
View file @
5514dc0b
...
...
@@ -186,7 +186,8 @@ struct DynamicRightPad
}
};
// idx_low = coefficients[0, ...nDimUp-1] * idx_up[0, ...nDimUp-1] + coefficients[nDimUp]
// idx_low = coefficients[0, ...nDimUp-1] * idx_up[0, ...nDimUp-1] +
// coefficients[nDimUp]
template
<
index_t
NDimUp
>
struct
DynamicEmbed
{
...
...
@@ -277,7 +278,7 @@ struct DynamicMerge
__host__
__device__
explicit
constexpr
DynamicMerge
(
const
LowerIndex
&
low_lengths
)
:
low_lengths_
{
low_lengths
},
low_lengths_scan_
{
reverse_
in
clusive_scan_on_array
(
low_lengths_scan_
{
reverse_
ex
clusive_scan_on_array
(
low_lengths
,
math
::
multiplies
<
index_t
>
{},
index_t
{
1
})},
up_length_
{
reduce_on_array
(
low_lengths
,
math
::
multiplies
<
index_t
>
(),
1
)}
{
...
...
@@ -303,16 +304,16 @@ struct DynamicMerge
static_assert
(
LowIdx
::
Size
()
==
NDimLow
&&
UpIdx
::
Size
()
==
1
,
"wrong! inconsistent # of dimension"
);
index_t
i
tmp
=
idx_up
[
0
];
index_t
tmp
=
idx_up
[
0
];
#pragma unroll
for
(
index_t
i
;
i
<
NDimLow
-
1
;
++
i
)
for
(
index_t
i
=
0
;
i
<
NDimLow
-
1
;
++
i
)
{
idx_low
(
i
)
=
i
tmp
/
low_lengths_scan_
[
i
];
i
tmp
-=
idx_low
[
i
]
*
low_lengths_scan_
[
i
];
idx_low
(
i
)
=
tmp
/
low_lengths_scan_
[
i
];
tmp
-=
idx_low
[
i
]
*
low_lengths_scan_
[
i
];
}
idx_low
(
NDimLow
-
1
)
=
i
tmp
;
idx_low
(
NDimLow
-
1
)
=
tmp
;
}
// idx_low_diff depends on idx_low_old, so idx_low need to be up-to-date
...
...
@@ -334,10 +335,14 @@ struct DynamicMerge
// 1) If idx_up_diff is known at compile-time, then idx_low_diff_const
// can be calculated at compile-time.
// 2) If idx_up_diff is not known at compile-time, but its value
// doesn't change during the whole kernel execution, then idx_low_diff_const also
// doesn't change during the whole kernel execution. Compiler generated ISA should
// only caclculate idx_low_diff_const once and save it durinng the whole kernel execution
// If neither 1) nor 2) is satisfied, then the calculation will also be computed at
// doesn't change during the whole kernel execution, then
// idx_low_diff_const also
// doesn't change during the whole kernel execution. Compiler generated
// ISA should
// only caclculate idx_low_diff_const once and save it durinng the whole
// kernel execution
// If neither 1) nor 2) is satisfied, then the calculation will also be
// computed at
// run-time each time this function is called, and can be very expensive.
LowerIndex
idx_low_diff_const
=
CalculateLowerIndex
(
idx_up_diff
);
...
...
@@ -346,7 +351,7 @@ struct DynamicMerge
index_t
carry
=
0
;
#pragma unroll
for
(
index_t
i
=
NDimLow
-
1
;
i
>
1
;
--
i
)
for
(
index_t
i
=
NDimLow
-
1
;
i
>
0
;
--
i
)
{
// this should be saved as well
index_t
idx_low_length_minus_idx_low_diff_const
=
...
...
composable_kernel/include/tensor_description/dynamic_tensor_descriptor.hpp
View file @
5514dc0b
...
...
@@ -168,10 +168,13 @@ struct DynamicTransformedTensorDescriptor
is_valid_sequence_map
<
sorted_low_dimension_ids
>
{},
"wrong! LowDimensionIds is not configured correctly"
);
// TODO: sanity check: while a up-dimension could be associated with multille
// transformation, a low-dimension should be associated with only one transformation
// TODO: sanity check: while a up-dimension could be associated with
// multille
// transformation, a low-dimension should be associated with only one
// transformation
// TODO: sanity-check: GetLowerLengths of each transform should be consistent with lengths
// TODO: sanity-check: GetLowerLengths of each transform should be
// consistent with lengths
// of lower-tensor-descriptor
}
...
...
@@ -222,14 +225,16 @@ struct DynamicTransformedTensorDescriptor
return
low_tensor_desc_
.
GetElementSpace
();
}
__host__
__device__
constexpr
auto
GetLowerTensorDescriptor
()
const
{
return
low_tensor_desc_
;
}
template
<
typename
LowIdx
,
typename
UpIdx
>
__host__
__device__
void
CalculateLowerIndex
(
LowIdx
&
idx_low
,
const
UpIdx
&
idx_up
)
const
{
static_for
<
0
,
NTransform
,
1
>
{}([
&
](
auto
itran
)
constexpr
{
auto
tran
=
transforms_
.
At
(
itran
);
const
auto
tran
=
transforms_
.
At
(
itran
);
auto
idx_up_part
=
pick_array_element
(
idx_up
,
UpDimensionIds
{}.
At
(
itran
));
auto
idx_low_part
=
pick_array_element
(
idx_low
,
LowDimensionIds
{}.
At
(
itran
));
const
auto
idx_up_part
=
pick_array_element
(
idx_up
,
UpDimensionIds
{}.
At
(
itran
));
auto
idx_low_part
=
pick_array_element
(
idx_low
,
LowDimensionIds
{}.
At
(
itran
));
tran
.
CalculateLowerIndex
(
idx_low_part
,
idx_up_part
);
});
...
...
composable_kernel/include/utility/array.hpp
View file @
5514dc0b
#ifndef CK_ARRAY_HPP
#define CK_ARRAY_HPP
#include "sequence.hpp"
#include "functional2.hpp"
#include "sequence.hpp"
namespace
ck
{
...
...
composable_kernel/include/utility/array_helper.hpp
View file @
5514dc0b
...
...
@@ -288,7 +288,7 @@ reverse_exclusive_scan_on_array(const Array<TData, NSize>& x, Reduce f, TData in
r
=
f
(
r
,
x
[
i
]);
}
y
(
NSize
-
1
)
=
r
;
y
(
0
)
=
r
;
return
y
;
}
...
...
composable_kernel/include/utility/common_header.hpp
View file @
5514dc0b
#ifndef CK_COMMON_HEADER_HPP
#define CK_COMMON_HEADER_HPP
#include "config.hpp"
#include "utility.hpp"
#include "integral_constant.hpp"
#include "number.hpp"
#include "float_type.hpp"
#include "type.hpp"
#include "tuple.hpp"
#include "tuple_helper.hpp"
#include "math.hpp"
#include "sequence.hpp"
#include "array.hpp"
#include "array_helper.hpp"
#include "config.hpp"
#include "float_type.hpp"
#include "functional.hpp"
#include "functional2.hpp"
#include "functional3.hpp"
#include "functional4.hpp"
#include "in_memory_operation.hpp"
#include "integral_constant.hpp"
#include "math.hpp"
#include "number.hpp"
#include "sequence.hpp"
#include "synchronization.hpp"
#include "tuple.hpp"
#include "tuple_helper.hpp"
#include "type.hpp"
#include "utility.hpp"
#if CK_USE_AMD_INLINE_ASM
#include "amd_inline_asm.hpp"
...
...
composable_kernel/include/utility/functional.hpp
View file @
5514dc0b
...
...
@@ -2,7 +2,6 @@
#define CK_FUNCTIONAL_HPP
#include "integral_constant.hpp"
#include "sequence.hpp"
#include "type.hpp"
namespace
ck
{
...
...
@@ -56,8 +55,10 @@ struct static_if<true>
__host__
__device__
constexpr
auto
operator
()(
F
f
)
const
{
// This is a trick for compiler:
// Pass forwarder to lambda "f" as "auto" argument, and make sure "f" will use it,
// this will make "f" a generic lambda, so that "f" won't be compiled until being
// Pass forwarder to lambda "f" as "auto" argument, and make sure "f" will
// use it,
// this will make "f" a generic lambda, so that "f" won't be compiled
// until being
// instantiated here
f
(
forwarder
{});
return
Type
{};
...
...
@@ -84,8 +85,10 @@ struct static_if<false>
__host__
__device__
static
void
Else
(
F
f
)
{
// This is a trick for compiler:
// Pass forwarder to lambda "f" as "auto" argument, and make sure "f" will use it,
// this will make "f" a generic lambda, so that "f" won't be compiled until being
// Pass forwarder to lambda "f" as "auto" argument, and make sure "f" will
// use it,
// this will make "f" a generic lambda, so that "f" won't be compiled
// until being
// instantiated here
f
(
forwarder
{});
}
...
...
composable_kernel/include/utility/functional3.hpp
View file @
5514dc0b
#ifndef CK_FUNCTIONAL3_HPP
#define CK_FUNCTIONAL3_HPP
#include "array.hpp"
#include "functional.hpp"
#include "functional2.hpp"
#include "sequence.hpp"
#include "array.hpp"
namespace
ck
{
...
...
@@ -83,8 +83,10 @@ struct ford_impl<Sequence<>, Orders>
}
// namespace detail
// Lengths is Sequence<...>, it is the length of each dimension for N-dimensional loop
// Orders is Sequence<...>, it is the order of dimension in which static_ford will loop over each
// Lengths is Sequence<...>, it is the length of each dimension for
// N-dimensional loop
// Orders is Sequence<...>, it is the order of dimension in which static_ford
// will loop over each
// dimension
template
<
class
Lengths
,
class
Orders
=
typename
arithmetic_sequence_gen
<
0
,
Lengths
::
GetSize
(),
1
>
::
type
>
...
...
@@ -106,8 +108,10 @@ struct static_ford
}
};
// Lengths is Sequence<...>, it is the length of each dimension for N-dimensional loop
// Orders is Sequence<...>, it is the order of dimension in which ford will loop over each
// Lengths is Sequence<...>, it is the length of each dimension for
// N-dimensional loop
// Orders is Sequence<...>, it is the order of dimension in which ford will loop
// over each
// dimension
template
<
class
Lengths
,
class
Orders
=
typename
arithmetic_sequence_gen
<
0
,
Lengths
::
GetSize
(),
1
>
::
type
>
...
...
composable_kernel/include/utility/math.hpp
View file @
5514dc0b
...
...
@@ -165,6 +165,6 @@ struct less
};
}
// namespace math
}
// namspace ck
}
// nam
e
space ck
#endif
composable_kernel/include/utility/print_array.hpp
deleted
100644 → 0
View file @
0b7fcca6
#ifndef CK_PRINT_ARRAY_HPP
#define CK_PRINT_ARRAY_HPP
#include "array.hpp"
namespace
ck
{
template
<
index_t
NSize
>
__host__
__device__
void
print_array
(
const
char
*
s
,
Array
<
uint32_t
,
NSize
>
a
)
{
constexpr
index_t
nsize
=
a
.
GetSize
();
static_assert
(
nsize
>
0
&&
nsize
<=
10
,
"wrong!"
);
static_if
<
nsize
==
1
>
{}([
&
](
auto
)
{
printf
(
"%s size %u, {%u}
\n
"
,
s
,
nsize
,
a
[
0
]);
});
static_if
<
nsize
==
2
>
{}([
&
](
auto
)
{
printf
(
"%s size %u, {%u %u}
\n
"
,
s
,
nsize
,
a
[
0
],
a
[
1
]);
});
static_if
<
nsize
==
3
>
{}(
[
&
](
auto
)
{
printf
(
"%s size %u, {%u %u %u}
\n
"
,
s
,
nsize
,
a
[
0
],
a
[
1
],
a
[
2
]);
});
static_if
<
nsize
==
4
>
{}(
[
&
](
auto
)
{
printf
(
"%s size %u, {%u %u %u %u}
\n
"
,
s
,
nsize
,
a
[
0
],
a
[
1
],
a
[
2
],
a
[
3
]);
});
static_if
<
nsize
==
5
>
{}([
&
](
auto
)
{
printf
(
"%s size %u, {%u %u %u %u %u}
\n
"
,
s
,
nsize
,
a
[
0
],
a
[
1
],
a
[
2
],
a
[
3
],
a
[
4
]);
});
static_if
<
nsize
==
6
>
{}([
&
](
auto
)
{
printf
(
"%s size %u, {%u %u %u %u %u %u}
\n
"
,
s
,
nsize
,
a
[
0
],
a
[
1
],
a
[
2
],
a
[
3
],
a
[
4
],
a
[
5
]);
});
static_if
<
nsize
==
7
>
{}([
&
](
auto
)
{
printf
(
"%s size %u, {%u %u %u %u %u %u %u}
\n
"
,
s
,
nsize
,
a
[
0
],
a
[
1
],
a
[
2
],
a
[
3
],
a
[
4
],
a
[
5
],
a
[
6
]);
});
static_if
<
nsize
==
8
>
{}([
&
](
auto
)
{
printf
(
"%s size %u, {%u %u %u %u %u %u %u %u}
\n
"
,
s
,
nsize
,
a
[
0
],
a
[
1
],
a
[
2
],
a
[
3
],
a
[
4
],
a
[
5
],
a
[
6
],
a
[
7
]);
});
static_if
<
nsize
==
9
>
{}([
&
](
auto
)
{
printf
(
"%s size %u, {%u %u %u %u %u %u %u %u %u}
\n
"
,
s
,
nsize
,
a
[
0
],
a
[
1
],
a
[
2
],
a
[
3
],
a
[
4
],
a
[
5
],
a
[
6
],
a
[
7
],
a
[
8
]);
});
static_if
<
nsize
==
10
>
{}([
&
](
auto
)
{
printf
(
"%s size %u, {%u %u %u %u %u %u %u %u %u %u}
\n
"
,
s
,
nsize
,
a
[
0
],
a
[
1
],
a
[
2
],
a
[
3
],
a
[
4
],
a
[
5
],
a
[
6
],
a
[
7
],
a
[
8
],
a
[
9
]);
});
}
template
<
index_t
NSize
>
__host__
__device__
void
print_array
(
const
char
*
s
,
Array
<
int32_t
,
NSize
>
a
)
{
constexpr
index_t
nsize
=
a
.
GetSize
();
static_assert
(
nsize
>
0
&&
nsize
<=
10
,
"wrong!"
);
static_if
<
nsize
==
1
>
{}([
&
](
auto
)
{
printf
(
"%s size %d, {%d}
\n
"
,
s
,
nsize
,
a
[
0
]);
});
static_if
<
nsize
==
2
>
{}([
&
](
auto
)
{
printf
(
"%s size %d, {%d %d}
\n
"
,
s
,
nsize
,
a
[
0
],
a
[
1
]);
});
static_if
<
nsize
==
3
>
{}(
[
&
](
auto
)
{
printf
(
"%s size %d, {%d %d %d}
\n
"
,
s
,
nsize
,
a
[
0
],
a
[
1
],
a
[
2
]);
});
static_if
<
nsize
==
4
>
{}(
[
&
](
auto
)
{
printf
(
"%s size %d, {%d %d %d %d}
\n
"
,
s
,
nsize
,
a
[
0
],
a
[
1
],
a
[
2
],
a
[
3
]);
});
static_if
<
nsize
==
5
>
{}([
&
](
auto
)
{
printf
(
"%s size %d, {%d %d %d %d %d}
\n
"
,
s
,
nsize
,
a
[
0
],
a
[
1
],
a
[
2
],
a
[
3
],
a
[
4
]);
});
static_if
<
nsize
==
6
>
{}([
&
](
auto
)
{
printf
(
"%s size %d, {%d %d %d %d %d %d}
\n
"
,
s
,
nsize
,
a
[
0
],
a
[
1
],
a
[
2
],
a
[
3
],
a
[
4
],
a
[
5
]);
});
static_if
<
nsize
==
7
>
{}([
&
](
auto
)
{
printf
(
"%s size %d, {%d %d %d %d %d %d %d}
\n
"
,
s
,
nsize
,
a
[
0
],
a
[
1
],
a
[
2
],
a
[
3
],
a
[
4
],
a
[
5
],
a
[
6
]);
});
static_if
<
nsize
==
8
>
{}([
&
](
auto
)
{
printf
(
"%s size %d, {%d %d %d %d %d %d %d %d}
\n
"
,
s
,
nsize
,
a
[
0
],
a
[
1
],
a
[
2
],
a
[
3
],
a
[
4
],
a
[
5
],
a
[
6
],
a
[
7
]);
});
static_if
<
nsize
==
9
>
{}([
&
](
auto
)
{
printf
(
"%s size %d, {%d %d %d %d %d %d %d %d %d}
\n
"
,
s
,
nsize
,
a
[
0
],
a
[
1
],
a
[
2
],
a
[
3
],
a
[
4
],
a
[
5
],
a
[
6
],
a
[
7
],
a
[
8
]);
});
static_if
<
nsize
==
10
>
{}([
&
](
auto
)
{
printf
(
"%s size %d, {%d %d %d %d %d %d %d %d %d %d}
\n
"
,
s
,
nsize
,
a
[
0
],
a
[
1
],
a
[
2
],
a
[
3
],
a
[
4
],
a
[
5
],
a
[
6
],
a
[
7
],
a
[
8
],
a
[
9
]);
});
}
}
// namespace ck
#endif
composable_kernel/include/utility/print_sequence.hpp
deleted
100644 → 0
View file @
0b7fcca6
#ifndef CK_PRINT_SEQUENCE_HPP
#define CK_PRINT_SEQUENCE_HPP
#include "sequence.hpp"
namespace
ck
{
template
<
index_t
...
Xs
>
__host__
__device__
void
print_sequence
(
const
char
*
s
,
Sequence
<
Xs
...
>
)
{
constexpr
index_t
nsize
=
Sequence
<
Xs
...
>::
Size
();
static_assert
(
nsize
<=
10
,
"wrong!"
);
static_if
<
nsize
==
0
>
{}([
&
](
auto
)
{
printf
(
"%s size %u, {}
\n
"
,
s
,
nsize
,
Xs
...);
});
static_if
<
nsize
==
1
>
{}([
&
](
auto
)
{
printf
(
"%s size %u, {%u}
\n
"
,
s
,
nsize
,
Xs
...);
});
static_if
<
nsize
==
2
>
{}([
&
](
auto
)
{
printf
(
"%s size %u, {%u %u}
\n
"
,
s
,
nsize
,
Xs
...);
});
static_if
<
nsize
==
3
>
{}([
&
](
auto
)
{
printf
(
"%s size %u, {%u %u %u}
\n
"
,
s
,
nsize
,
Xs
...);
});
static_if
<
nsize
==
4
>
{}([
&
](
auto
)
{
printf
(
"%s size %u, {%u %u %u %u}
\n
"
,
s
,
nsize
,
Xs
...);
});
static_if
<
nsize
==
5
>
{}(
[
&
](
auto
)
{
printf
(
"%s size %u, {%u %u %u %u %u}
\n
"
,
s
,
nsize
,
Xs
...);
});
static_if
<
nsize
==
6
>
{}(
[
&
](
auto
)
{
printf
(
"%s size %u, {%u %u %u %u %u %u}
\n
"
,
s
,
nsize
,
Xs
...);
});
static_if
<
nsize
==
7
>
{}(
[
&
](
auto
)
{
printf
(
"%s size %u, {%u %u %u %u %u %u %u}
\n
"
,
s
,
nsize
,
Xs
...);
});
static_if
<
nsize
==
8
>
{}(
[
&
](
auto
)
{
printf
(
"%s size %u, {%u %u %u %u %u %u %u %u}
\n
"
,
s
,
nsize
,
Xs
...);
});
static_if
<
nsize
==
9
>
{}(
[
&
](
auto
)
{
printf
(
"%s size %u, {%u %u %u %u %u %u %u %u %u}
\n
"
,
s
,
nsize
,
Xs
...);
});
static_if
<
nsize
==
10
>
{}(
[
&
](
auto
)
{
printf
(
"%s size %u, {%u %u %u %u %u %u %u %u %u %u}
\n
"
,
s
,
nsize
,
Xs
...);
});
}
}
// namespace ck
#endif
composable_kernel/include/utility/tuple.hpp
View file @
5514dc0b
...
...
@@ -2,8 +2,8 @@
#define CK_TUPLE_HPP
#include "integral_constant.hpp"
#include "type.hpp"
#include "sequence.hpp"
#include "type.hpp"
namespace
ck
{
...
...
composable_kernel/include/utility/utility.hpp
View file @
5514dc0b
...
...
@@ -9,6 +9,6 @@ __device__ index_t get_thread_local_1d_id() { return threadIdx.x; }
__device__
index_t
get_block_1d_id
()
{
return
blockIdx
.
x
;
}
}
// namspace ck
}
// nam
e
space ck
#endif
driver/include/device_dummy_dynamic_transform.hpp
View file @
5514dc0b
...
...
@@ -40,6 +40,31 @@ void device_dummy_dynamic_transform(InDesc,
const
auto
in_left_pads
=
to_array
(
InLeftPads
{});
const
auto
in_right_pads
=
to_array
(
InRightPads
{});
{
const
auto
tensor_descs
=
map_convolution_into_gemm
(
wei_kcyx_desc
,
in_nchw_desc
,
out_nkhw_desc
,
conv_strides
,
conv_dilations
,
in_left_pads
,
in_right_pads
);
const
auto
in_gemmk_gemmn_global_desc
=
tensor_descs
.
At
(
Number
<
0
>
{});
print_array
(
"cpu: in_gemmk_gemmn_global_desc:"
,
in_gemmk_gemmn_global_desc
.
GetLengths
());
const
auto
idx0
=
MultiIndex
<
2
>
({
2591
,
36991
});
const
auto
idx1
=
in_gemmk_gemmn_global_desc
.
CalculateLowerIndex
(
idx0
);
const
auto
idx2
=
in_gemmk_gemmn_global_desc
.
GetLowerTensorDescriptor
().
CalculateLowerIndex
(
idx1
);
const
index_t
offset
=
in_gemmk_gemmn_global_desc
.
CalculateOffset
(
idx0
);
print_array
(
"idx0:"
,
idx0
);
print_array
(
"idx1:"
,
idx1
);
print_array
(
"idx2:"
,
idx2
);
printf
(
"offset %d
\n
"
,
offset
);
}
std
::
size_t
data_sz
=
sizeof
(
T
);
DeviceMem
in_nchw_device_buf
(
data_sz
*
in_nchw
.
mDesc
.
GetElementSpace
());
DeviceMem
wei_kcyx_device_buf
(
data_sz
*
wei_kcyx
.
mDesc
.
GetElementSpace
());
...
...
@@ -54,6 +79,7 @@ void device_dummy_dynamic_transform(InDesc,
printf
(
"%s: BlockSize %u, GridSize %u
\n
"
,
__func__
,
BlockSize
,
GridSize
);
#if 0
using dummy_transform = DummyDynamicTransform<BlockSize>;
for(index_t i = 0; i < 5; ++i)
...
...
@@ -92,6 +118,7 @@ void device_dummy_dynamic_transform(InDesc,
in_right_pads);
}
}
#endif
out_nkhw_device_buf
.
FromDevice
(
out_nkhw
.
mData
.
data
());
}
driver/src/conv_driver.cpp
View file @
5514dc0b
...
...
@@ -5,8 +5,7 @@
#include <stdlib.h>
#include <half.hpp>
#include "config.hpp"
#include "print_array.hpp"
#include "print_sequence.hpp"
#include "print.hpp"
#include "device.hpp"
#include "host_tensor_generator.hpp"
#include "conv_common.hpp"
...
...
@@ -496,10 +495,10 @@ int main(int argc, char* argv[])
ostream_tensor_descriptor
(
in_nchw_desc
,
std
::
cout
<<
"in_nchw_desc: "
);
ostream_tensor_descriptor
(
wei_kcyx_desc
,
std
::
cout
<<
"wei_kcyx_desc: "
);
ostream_tensor_descriptor
(
out_nkhw_desc
,
std
::
cout
<<
"out_nkhw_desc: "
);
print_
sequence
(
"LeftPads"
,
LeftPads
{});
print_
sequence
(
"RightPads"
,
RightPads
{});
print_
sequence
(
"ConvStrides"
,
ConvStrides
{});
print_
sequence
(
"ConvDilations"
,
ConvDilations
{});
print_
array
(
"LeftPads"
,
LeftPads
{});
print_
array
(
"RightPads"
,
RightPads
{});
print_
array
(
"ConvStrides"
,
ConvStrides
{});
print_
array
(
"ConvDilations"
,
ConvDilations
{});
#if 1
using
in_data_t
=
float
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment