Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
32d485dd
Commit
32d485dd
authored
Apr 23, 2021
by
Chao Liu
Browse files
refactor DynamicBuffer
parent
b6e43b25
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
13 additions
and
30 deletions
+13
-30
composable_kernel/include/tensor_operation/threadwise_dynamic_tensor_slice_transfer.hpp
...or_operation/threadwise_dynamic_tensor_slice_transfer.hpp
+1
-12
composable_kernel/include/utility/buffer.hpp
composable_kernel/include/utility/buffer.hpp
+8
-14
composable_kernel/include/utility/config.amd.hpp.in
composable_kernel/include/utility/config.amd.hpp.in
+1
-1
driver/src/conv_driver.cpp
driver/src/conv_driver.cpp
+3
-3
No files found.
composable_kernel/include/tensor_operation/threadwise_dynamic_tensor_slice_transfer.hpp
View file @
32d485dd
...
@@ -1487,20 +1487,9 @@ struct ThreadwiseDynamicTensorSliceTransfer_v4
...
@@ -1487,20 +1487,9 @@ struct ThreadwiseDynamicTensorSliceTransfer_v4
const
bool
is_src_valid
=
coordinate_has_valid_offset_assuming_visible_index_is_valid
(
const
bool
is_src_valid
=
coordinate_has_valid_offset_assuming_visible_index_is_valid
(
src_desc
,
src_data_coord
);
src_desc
,
src_data_coord
);
#if 0
// TODO: this is slooooooooow due to VGPR over-allocation
src_tmp_vector
.
template
AsType
<
src_vector_t
>()(
Number
<
0
>
{})
=
src_tmp_vector
.
template
AsType
<
src_vector_t
>()(
Number
<
0
>
{})
=
is_src_valid ? src_buf.template AsType<src_vector_t>()[src_data_coord.GetOffset() /
is_src_valid
?
src_buf
.
template
Get
<
src_vector_t
>(
src_data_coord
.
GetOffset
())
SrcScalarPerVector]
:
src_vector_t
{
0
};
:
src_vector_t
{
0
};
#else
// TODO: this is workaround. this has normal performance but it's hacky
src_tmp_vector
.
template
AsType
<
src_vector_t
>()(
Number
<
0
>
{})
=
is_src_valid
?
*
reinterpret_cast
<
const
src_vector_t
*>
(
&
(
reinterpret_cast
<
const
SrcData
*>
(
src_buf
.
p_data_
)[
src_data_coord
.
GetOffset
()]))
:
src_vector_t
{
0
};
#endif
// copy data from src_tmp_vector to dst_tmp_vector (data cast data from SrcData to
// copy data from src_tmp_vector to dst_tmp_vector (data cast data from SrcData to
// DstData)
// DstData)
...
...
composable_kernel/include/utility/buffer.hpp
View file @
32d485dd
...
@@ -29,28 +29,22 @@ struct DynamicBuffer
...
@@ -29,28 +29,22 @@ struct DynamicBuffer
{
{
using
type
=
T
;
using
type
=
T
;
template
<
typename
X
>
struct
PointerWrapper
{
X
*
p_
;
__host__
__device__
constexpr
const
X
&
operator
[](
index_t
i
)
const
{
return
p_
[
i
];
}
__host__
__device__
constexpr
X
&
operator
()(
index_t
i
)
{
return
p_
[
i
];
}
};
T
*
p_data_
;
T
*
p_data_
;
__host__
__device__
constexpr
DynamicBuffer
(
T
*
p_data
)
:
p_data_
{
p_data
}
{}
__host__
__device__
constexpr
DynamicBuffer
(
T
*
p_data
)
:
p_data_
{
p_data
}
{}
__host__
__device__
constexpr
const
T
&
operator
[](
index_t
i
)
const
{
return
p_data_
[
i
];
}
__host__
__device__
constexpr
T
&
operator
()(
index_t
i
)
{
return
p_data_
[
i
];
}
template
<
typename
X
,
template
<
typename
X
,
typename
std
::
enable_if
<
typename
std
::
enable_if
<
is_same
<
typename
scalar_type
<
remove_cv_t
<
remove_reference_t
<
X
>
>>::
type
,
is_same
<
typename
scalar_type
<
remove_cv_t
<
remove_reference_t
<
X
>
>>::
type
,
typename
scalar_type
<
remove_cv_t
<
remove_reference_t
<
T
>>>::
type
>::
value
,
typename
scalar_type
<
remove_cv_t
<
remove_reference_t
<
T
>>>::
type
>::
value
,
bool
>::
type
=
false
>
bool
>::
type
=
false
>
__host__
__device__
constexpr
const
auto
AsType
(
)
const
__host__
__device__
constexpr
const
auto
Get
(
index_t
i
)
const
{
{
return
PointerWrapper
<
X
>
{
reinterpret_cast
<
X
*>
(
p_data_
)
}
;
return
*
reinterpret_cast
<
const
X
*>
(
&
p_data_
[
i
]
);
}
}
template
<
typename
X
,
template
<
typename
X
,
...
@@ -58,9 +52,9 @@ struct DynamicBuffer
...
@@ -58,9 +52,9 @@ struct DynamicBuffer
is_same
<
typename
scalar_type
<
remove_cv_t
<
remove_reference_t
<
X
>
>>::
type
,
is_same
<
typename
scalar_type
<
remove_cv_t
<
remove_reference_t
<
X
>
>>::
type
,
typename
scalar_type
<
remove_cv_t
<
remove_reference_t
<
T
>>>::
type
>::
value
,
typename
scalar_type
<
remove_cv_t
<
remove_reference_t
<
T
>>>::
type
>::
value
,
bool
>::
type
=
false
>
bool
>::
type
=
false
>
__host__
__device__
constexpr
auto
AsType
(
)
__host__
__device__
void
Set
(
index_t
i
,
const
X
&
x
)
{
{
return
PointerWrapper
<
X
>
{
reinterpret_cast
<
X
*>
(
p_data_
)}
;
*
reinterpret_cast
<
X
*>
(
&
p_data_
[
i
])
=
x
;
}
}
__host__
__device__
static
constexpr
bool
IsStaticBuffer
()
{
return
false
;
}
__host__
__device__
static
constexpr
bool
IsStaticBuffer
()
{
return
false
;
}
...
...
composable_kernel/include/utility/config.amd.hpp.in
View file @
32d485dd
...
@@ -14,7 +14,7 @@
...
@@ -14,7 +14,7 @@
#define CK_DEVICE_BACKEND_AMD 1
#define CK_DEVICE_BACKEND_AMD 1
// GPU ID
// GPU ID
#if
0
#if
1
#define CK_AMD_GPU_GFX906 1
#define CK_AMD_GPU_GFX906 1
#elif 0
#elif 0
#define CK_AMD_GPU_GFX908 1
#define CK_AMD_GPU_GFX908 1
...
...
driver/src/conv_driver.cpp
View file @
32d485dd
...
@@ -64,7 +64,7 @@ int main(int argc, char* argv[])
...
@@ -64,7 +64,7 @@ int main(int argc, char* argv[])
using
LeftPads
=
Sequence
<
0
,
0
>
;
using
LeftPads
=
Sequence
<
0
,
0
>
;
using
RightPads
=
Sequence
<
0
,
0
>
;
using
RightPads
=
Sequence
<
0
,
0
>
;
#elif
1
#elif
0
constexpr
index_t
N
=
1
;
constexpr
index_t
N
=
1
;
constexpr
index_t
C
=
16
;
constexpr
index_t
C
=
16
;
constexpr
index_t
HI
=
1080
;
constexpr
index_t
HI
=
1080
;
...
@@ -630,7 +630,7 @@ int main(int argc, char* argv[])
...
@@ -630,7 +630,7 @@ int main(int argc, char* argv[])
print_array
(
"ConvStrides"
,
to_multi_index
(
ConvStrides
{}));
print_array
(
"ConvStrides"
,
to_multi_index
(
ConvStrides
{}));
print_array
(
"ConvDilations"
,
to_multi_index
(
ConvDilations
{}));
print_array
(
"ConvDilations"
,
to_multi_index
(
ConvDilations
{}));
#if
0
#if
1
using
in_data_t
=
float
;
using
in_data_t
=
float
;
constexpr
index_t
in_vector_size
=
1
;
constexpr
index_t
in_vector_size
=
1
;
using
acc_data_t
=
float
;
using
acc_data_t
=
float
;
...
@@ -724,7 +724,7 @@ int main(int argc, char* argv[])
...
@@ -724,7 +724,7 @@ int main(int argc, char* argv[])
LeftPads
{},
LeftPads
{},
RightPads
{},
RightPads
{},
nrepeat
);
nrepeat
);
#elif
0
#elif
1
device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw
<
in_data_t
,
device_dynamic_convolution_forward_implicit_gemm_v4r4_nchw_kcyx_nkhw
<
in_data_t
,
in_vector_size
,
in_vector_size
,
acc_data_t
,
acc_data_t
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment