Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
b23e7f8e
Commit
b23e7f8e
authored
Sep 11, 2020
by
Chao Liu
Browse files
dynamic tensor descriptor v2 can produce correct result, but spill too many register
parent
0a944e8f
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
65 additions
and
12 deletions
+65
-12
composable_kernel/include/tensor_description/dynamic_tensor_descriptor_helper_v2.hpp
...ensor_description/dynamic_tensor_descriptor_helper_v2.hpp
+2
-2
composable_kernel/include/tensor_description/dynamic_tensor_descriptor_v2.hpp
...clude/tensor_description/dynamic_tensor_descriptor_v2.hpp
+8
-1
composable_kernel/include/utility/array.hpp
composable_kernel/include/utility/array.hpp
+12
-0
composable_kernel/include/utility/array_helper.hpp
composable_kernel/include/utility/array_helper.hpp
+6
-0
composable_kernel/include/utility/print.hpp
composable_kernel/include/utility/print.hpp
+33
-2
driver/include/device_dummy_dynamic_transform.hpp
driver/include/device_dummy_dynamic_transform.hpp
+4
-7
No files found.
composable_kernel/include/tensor_description/dynamic_tensor_descriptor_helper_v2.hpp
View file @
b23e7f8e
...
@@ -15,7 +15,7 @@ make_dynamic_native_tensor_descriptor_packed_v2(const MultiIndex<N>& lengths)
...
@@ -15,7 +15,7 @@ make_dynamic_native_tensor_descriptor_packed_v2(const MultiIndex<N>& lengths)
constexpr
auto
low_dim_hidden_idss
=
make_tuple
(
Sequence
<
0
>
{});
constexpr
auto
low_dim_hidden_idss
=
make_tuple
(
Sequence
<
0
>
{});
constexpr
auto
up_dim_hidden_idss
=
constexpr
auto
up_dim_hidden_idss
=
make_tuple
(
typename
arithmetic_sequence_gen
<
1
,
N
+
1
,
1
>::
type
{});
make_tuple
(
typename
arithmetic_sequence_gen
<
1
,
N
+
1
,
1
>::
type
{});
constexpr
auto
visible_dim_hidden_ids
=
typename
arithmetic_sequence_gen
<
0
,
N
,
1
>::
type
{};
constexpr
auto
visible_dim_hidden_ids
=
typename
arithmetic_sequence_gen
<
1
,
N
+
1
,
1
>::
type
{};
const
index_t
element_space_size
=
const
index_t
element_space_size
=
reduce_on_array
(
lengths
,
math
::
multiplies
<
index_t
>
{},
index_t
{
1
});
reduce_on_array
(
lengths
,
math
::
multiplies
<
index_t
>
{},
index_t
{
1
});
...
@@ -37,7 +37,7 @@ make_dynamic_native_tensor_descriptor_v2(const MultiIndex<N>& lengths, const Mul
...
@@ -37,7 +37,7 @@ make_dynamic_native_tensor_descriptor_v2(const MultiIndex<N>& lengths, const Mul
constexpr
auto
low_dim_hidden_idss
=
make_tuple
(
Sequence
<
0
>
{});
constexpr
auto
low_dim_hidden_idss
=
make_tuple
(
Sequence
<
0
>
{});
constexpr
auto
up_dim_hidden_idss
=
constexpr
auto
up_dim_hidden_idss
=
make_tuple
(
typename
arithmetic_sequence_gen
<
1
,
N
+
1
,
1
>::
type
{});
make_tuple
(
typename
arithmetic_sequence_gen
<
1
,
N
+
1
,
1
>::
type
{});
constexpr
auto
visible_dim_hidden_ids
=
typename
arithmetic_sequence_gen
<
0
,
N
,
1
>::
type
{};
constexpr
auto
visible_dim_hidden_ids
=
typename
arithmetic_sequence_gen
<
1
,
N
+
1
,
1
>::
type
{};
index_t
element_space_size
=
1
;
index_t
element_space_size
=
1
;
...
...
composable_kernel/include/tensor_description/dynamic_tensor_descriptor_v2.hpp
View file @
b23e7f8e
...
@@ -282,6 +282,8 @@ struct DynamicTensorCoordinateStep_v2
...
@@ -282,6 +282,8 @@ struct DynamicTensorCoordinateStep_v2
{
{
}
}
__host__
__device__
constexpr
const
auto
&
GetIndexDiff
()
const
{
return
GetVisibleIndexDiff
();
}
// private:
// private:
__host__
__device__
constexpr
const
auto
&
GetVisibleIndexDiff
()
const
__host__
__device__
constexpr
const
auto
&
GetVisibleIndexDiff
()
const
{
{
...
@@ -510,7 +512,12 @@ __host__ __device__ void move_dynamic_tensor_coordinate_v2(const TensorDesc& ten
...
@@ -510,7 +512,12 @@ __host__ __device__ void move_dynamic_tensor_coordinate_v2(const TensorDesc& ten
// this is what needs to be updated
// this is what needs to be updated
auto
&
idx_hidden
=
coord
.
GetHiddenIndex
();
auto
&
idx_hidden
=
coord
.
GetHiddenIndex
();
// update hidden index
// update visible index
auto
idx_hidden_pick_visible
=
pick_array_element
(
idx_hidden
,
TensorDesc
::
GetVisibleDimensionIds
());
idx_hidden_pick_visible
+=
coord_step
.
GetIndexDiff
();
// update rest of hidden index
static_for
<
ntransform
-
1
,
-
1
,
-
1
>
{}([
&
](
auto
itran
)
{
static_for
<
ntransform
-
1
,
-
1
,
-
1
>
{}([
&
](
auto
itran
)
{
const
auto
&
tran
=
tensor_desc
.
GetTransforms
().
At
(
itran
);
const
auto
&
tran
=
tensor_desc
.
GetTransforms
().
At
(
itran
);
constexpr
auto
dims_low
=
TensorDesc
::
GetLowerDimensionIdss
().
At
(
itran
);
constexpr
auto
dims_low
=
TensorDesc
::
GetLowerDimensionIdss
().
At
(
itran
);
...
...
composable_kernel/include/utility/array.hpp
View file @
b23e7f8e
...
@@ -147,6 +147,18 @@ struct Array
...
@@ -147,6 +147,18 @@ struct Array
return
new_array
;
return
new_array
;
}
}
template
<
index_t
NAppend
>
__host__
__device__
constexpr
auto
Append
(
const
Array
<
TData
,
NAppend
>&
xs
)
const
{
Array
<
TData
,
NSize
+
NAppend
>
r
;
static_for
<
0
,
NSize
,
1
>
{}([
&
r
,
this
](
auto
i
)
constexpr
{
r
(
i
)
=
(
*
this
)[
i
];
});
static_for
<
0
,
NAppend
,
1
>
{}([
&
r
,
&
xs
](
auto
i
)
constexpr
{
r
(
NSize
+
i
)
=
xs
[
i
];
});
return
r
;
}
};
};
// Arr: Array
// Arr: Array
...
...
composable_kernel/include/utility/array_helper.hpp
View file @
b23e7f8e
...
@@ -5,6 +5,12 @@
...
@@ -5,6 +5,12 @@
namespace
ck
{
namespace
ck
{
template
<
typename
X
,
typename
...
Xs
>
__host__
__device__
constexpr
auto
make_array
(
const
X
&
x
,
const
Xs
&
...
xs
)
{
return
Array
<
X
,
sizeof
...(
xs
)
+
1
>
{{
x
,
xs
...}};
}
template
<
typename
Arr
,
typename
Picks
>
template
<
typename
Arr
,
typename
Picks
>
__host__
__device__
constexpr
auto
pick_array_element
(
Arr
&
a
,
Picks
)
__host__
__device__
constexpr
auto
pick_array_element
(
Arr
&
a
,
Picks
)
{
{
...
...
composable_kernel/include/utility/print.hpp
View file @
b23e7f8e
...
@@ -2,6 +2,7 @@
...
@@ -2,6 +2,7 @@
#define CK_PRINT_HPP
#define CK_PRINT_HPP
#include "array.hpp"
#include "array.hpp"
#include "array_helper.hpp"
#include "sequence.hpp"
#include "sequence.hpp"
namespace
ck
{
namespace
ck
{
...
@@ -12,8 +13,6 @@ __host__ __device__ void print_array(const char* s, T a)
...
@@ -12,8 +13,6 @@ __host__ __device__ void print_array(const char* s, T a)
using
data_type
=
typename
decltype
(
a
)
::
data_type
;
using
data_type
=
typename
decltype
(
a
)
::
data_type
;
constexpr
index_t
nsize
=
a
.
Size
();
constexpr
index_t
nsize
=
a
.
Size
();
static_assert
(
nsize
>=
0
&&
nsize
<=
10
,
"wrong!"
);
if
constexpr
(
is_same
<
data_type
,
uint32_t
>
{})
if
constexpr
(
is_same
<
data_type
,
uint32_t
>
{})
{
{
if
constexpr
(
nsize
==
0
)
if
constexpr
(
nsize
==
0
)
...
@@ -103,6 +102,12 @@ __host__ __device__ void print_array(const char* s, T a)
...
@@ -103,6 +102,12 @@ __host__ __device__ void print_array(const char* s, T a)
a
[
8
],
a
[
8
],
a
[
9
]);
a
[
9
]);
}
}
else
{
printf
(
"%s size %u, {"
,
s
,
nsize
);
static_for
<
0
,
nsize
,
1
>
{}([
&
a
](
auto
i
)
constexpr
{
printf
(
"%u, "
,
a
[
i
]);
});
printf
(
"}
\n
"
);
}
}
}
else
if
constexpr
(
is_same
<
data_type
,
int32_t
>
{})
else
if
constexpr
(
is_same
<
data_type
,
int32_t
>
{})
{
{
...
@@ -193,6 +198,32 @@ __host__ __device__ void print_array(const char* s, T a)
...
@@ -193,6 +198,32 @@ __host__ __device__ void print_array(const char* s, T a)
a
[
8
],
a
[
8
],
a
[
9
]);
a
[
9
]);
}
}
else
{
printf
(
"%s size %d, {"
,
s
,
nsize
);
static_for
<
0
,
nsize
,
1
>
{}([
&
a
](
auto
i
)
constexpr
{
printf
(
"%d, "
,
a
[
i
]);
});
printf
(
"}
\n
"
);
}
}
}
template
<
typename
T
>
__host__
__device__
void
print_array_v2
(
const
char
*
s
,
T
a
)
{
using
data_type
=
typename
decltype
(
a
)
::
data_type
;
constexpr
index_t
nsize
=
a
.
Size
();
if
constexpr
(
is_same
<
data_type
,
uint32_t
>
{})
{
printf
(
"%s size %u, {"
,
s
,
nsize
);
static_for
<
0
,
nsize
,
1
>
{}([
&
a
](
auto
i
)
constexpr
{
printf
(
"[%u] %u, "
,
i
.
value
,
a
[
i
]);
});
printf
(
"}
\n
"
);
}
else
if
constexpr
(
is_same
<
data_type
,
int32_t
>
{})
{
printf
(
"%s size %d, {"
,
s
,
nsize
);
static_for
<
0
,
nsize
,
1
>
{}([
&
a
](
auto
i
)
constexpr
{
printf
(
"[%d] %d, "
,
i
.
value
,
a
[
i
]);
});
printf
(
"}
\n
"
);
}
}
}
}
...
...
driver/include/device_dummy_dynamic_transform.hpp
View file @
b23e7f8e
...
@@ -54,7 +54,7 @@ void device_dummy_dynamic_transform(InDesc,
...
@@ -54,7 +54,7 @@ void device_dummy_dynamic_transform(InDesc,
auto
in_gemmk_gemmn_coord
=
auto
in_gemmk_gemmn_coord
=
make_dynamic_tensor_coordinate
(
in_gemmk_gemmn_global_desc
,
MultiIndex
<
2
>
{
0
,
0
});
make_dynamic_tensor_coordinate
(
in_gemmk_gemmn_global_desc
,
MultiIndex
<
2
>
{
0
,
0
});
for
(
index_t
iter
=
0
;
iter
<
10
0
;
++
iter
)
for
(
index_t
iter
=
0
;
iter
<
10
;
++
iter
)
{
{
constexpr
auto
gemmk1_gemmn0
=
MultiIndex
<
2
>
{
1
,
0
};
constexpr
auto
gemmk1_gemmn0
=
MultiIndex
<
2
>
{
1
,
0
};
...
@@ -190,17 +190,14 @@ void device_dummy_dynamic_transform_v2(InDesc,
...
@@ -190,17 +190,14 @@ void device_dummy_dynamic_transform_v2(InDesc,
make_dynamic_tensor_coordinate_v2
(
in_gemmk_gemmn_global_desc
,
MultiIndex
<
2
>
{
0
,
0
});
make_dynamic_tensor_coordinate_v2
(
in_gemmk_gemmn_global_desc
,
MultiIndex
<
2
>
{
0
,
0
});
const
auto
in_gemmk_gemmn_coord_step
=
const
auto
in_gemmk_gemmn_coord_step
=
make_dynamic_tensor_coordinate_step_v2
(
in_gemmk_gemmn_global_desc
,
MultiIndex
<
2
>
{
1
,
0
});
make_dynamic_tensor_coordinate_step_v2
(
in_gemmk_gemmn_global_desc
,
MultiIndex
<
2
>
{
0
,
1
});
for
(
index_t
iter
=
0
;
iter
<
100
;
++
iter
)
for
(
index_t
iter
=
0
;
iter
<
100
;
++
iter
)
{
{
constexpr
auto
gemmk1_gemmn0
=
MultiIndex
<
2
>
{
1
,
0
};
printf
(
"iter %d
\n
"
,
iter
);
printf
(
"iter %d
\n
"
,
iter
);
print_array_v2
(
"visible idx: "
,
in_gemmk_gemmn_coord
.
GetIndex
());
print_array
(
"
idx: "
,
in_gemmk_gemmn_coord
.
GetIndex
());
print_array
_v2
(
"hidden
idx: "
,
in_gemmk_gemmn_coord
.
Get
Hidden
Index
());
printf
(
"offset: %d
\n
"
,
in_gemmk_gemmn_coord
.
GetOffset
());
printf
(
"offset: %d
\n
"
,
in_gemmk_gemmn_coord
.
GetOffset
());
printf
(
"
\n
"
);
printf
(
"
\n
"
);
move_dynamic_tensor_coordinate_v2
(
move_dynamic_tensor_coordinate_v2
(
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment