Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
07af8343
Commit
07af8343
authored
Apr 14, 2022
by
carlushuang
Browse files
fix compile error after merge develop
parent
07a673c6
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
23 additions
and
23 deletions
+23
-23
include/ck/tensor_operation/cpu/thread/threadwise_tensor_slice_transfer_avx2.hpp
...tion/cpu/thread/threadwise_tensor_slice_transfer_avx2.hpp
+1
-1
include/ck/utility/dynamic_buffer_cpu.hpp
include/ck/utility/dynamic_buffer_cpu.hpp
+8
-8
test/cpu_threadwise_transfer/cpu_threadwise_transfer.cpp
test/cpu_threadwise_transfer/cpu_threadwise_transfer.cpp
+14
-14
No files found.
include/ck/tensor_operation/cpu/thread/threadwise_tensor_slice_transfer_avx2.hpp
View file @
07af8343
...
@@ -29,7 +29,7 @@ template <typename SrcData,
...
@@ -29,7 +29,7 @@ template <typename SrcData,
index_t
VectorDim
,
index_t
VectorDim
,
index_t
ScalarPerVector
,
// src/dst must use same vector size, aka src/dst both need same
index_t
ScalarPerVector
,
// src/dst must use same vector size, aka src/dst both need same
// avx/float register
// avx/float register
InMemoryDataOperationEnum
_t
DstInMemOp
,
InMemoryDataOperationEnum
DstInMemOp
,
bool
SrcResetCoordinateAfterRun
,
bool
SrcResetCoordinateAfterRun
,
bool
DstResetCoordinateAfterRun
>
bool
DstResetCoordinateAfterRun
>
struct
ThreadwiseTensorSliceTransferAvx2
struct
ThreadwiseTensorSliceTransferAvx2
...
...
include/ck/utility/dynamic_buffer_cpu.hpp
View file @
07af8343
...
@@ -8,7 +8,7 @@
...
@@ -8,7 +8,7 @@
namespace
ck
{
namespace
ck
{
namespace
cpu
{
namespace
cpu
{
template
<
AddressSpaceEnum
_t
BufferAddressSpace
,
template
<
AddressSpaceEnum
BufferAddressSpace
,
typename
T
,
typename
T
,
typename
ElementSpaceSize
,
typename
ElementSpaceSize
,
bool
InvalidElementUseNumericalZeroValue
>
bool
InvalidElementUseNumericalZeroValue
>
...
@@ -17,7 +17,7 @@ struct DynamicBuffer
...
@@ -17,7 +17,7 @@ struct DynamicBuffer
using
type
=
T
;
using
type
=
T
;
static_assert
(
BufferAddressSpace
==
static_assert
(
BufferAddressSpace
==
AddressSpaceEnum
_t
::
Global
);
// only valid for global address space on cpu
AddressSpaceEnum
::
Global
);
// only valid for global address space on cpu
T
*
p_data_
;
T
*
p_data_
;
ElementSpaceSize
element_space_size_
;
ElementSpaceSize
element_space_size_
;
...
@@ -35,7 +35,7 @@ struct DynamicBuffer
...
@@ -35,7 +35,7 @@ struct DynamicBuffer
{
{
}
}
static
constexpr
AddressSpaceEnum
_t
GetAddressSpace
()
{
return
BufferAddressSpace
;
}
static
constexpr
AddressSpaceEnum
GetAddressSpace
()
{
return
BufferAddressSpace
;
}
constexpr
const
T
&
operator
[](
index_t
i
)
const
{
return
p_data_
[
i
];
}
constexpr
const
T
&
operator
[](
index_t
i
)
const
{
return
p_data_
[
i
];
}
...
@@ -68,18 +68,18 @@ struct DynamicBuffer
...
@@ -68,18 +68,18 @@ struct DynamicBuffer
}
}
}
}
template
<
InMemoryDataOperationEnum
_t
Op
,
template
<
InMemoryDataOperationEnum
Op
,
typename
X
,
typename
X
,
typename
enable_if
<
is_same
<
typename
scalar_type
<
remove_cvref_t
<
X
>
>::
type
,
typename
enable_if
<
is_same
<
typename
scalar_type
<
remove_cvref_t
<
X
>
>::
type
,
typename
scalar_type
<
remove_cvref_t
<
T
>>::
type
>::
value
,
typename
scalar_type
<
remove_cvref_t
<
T
>>::
type
>::
value
,
bool
>::
type
=
false
>
bool
>::
type
=
false
>
void
Update
(
index_t
i
,
bool
is_valid_element
,
const
X
&
x
)
void
Update
(
index_t
i
,
bool
is_valid_element
,
const
X
&
x
)
{
{
if
constexpr
(
Op
==
InMemoryDataOperationEnum
_t
::
Set
)
if
constexpr
(
Op
==
InMemoryDataOperationEnum
::
Set
)
{
{
this
->
template
Set
<
X
>(
i
,
is_valid_element
,
x
);
this
->
template
Set
<
X
>(
i
,
is_valid_element
,
x
);
}
}
else
if
constexpr
(
Op
==
InMemoryDataOperationEnum
_t
::
Add
)
else
if
constexpr
(
Op
==
InMemoryDataOperationEnum
::
Add
)
{
{
auto
tmp
=
this
->
template
Get
<
X
>(
i
,
is_valid_element
);
auto
tmp
=
this
->
template
Get
<
X
>(
i
,
is_valid_element
);
this
->
template
Set
<
X
>(
i
,
is_valid_element
,
x
+
tmp
);
this
->
template
Set
<
X
>(
i
,
is_valid_element
,
x
+
tmp
);
...
@@ -111,14 +111,14 @@ struct DynamicBuffer
...
@@ -111,14 +111,14 @@ struct DynamicBuffer
static
constexpr
bool
IsDynamicBuffer
()
{
return
true
;
}
static
constexpr
bool
IsDynamicBuffer
()
{
return
true
;
}
};
};
template
<
AddressSpaceEnum
_t
BufferAddressSpace
,
typename
T
,
typename
ElementSpaceSize
>
template
<
AddressSpaceEnum
BufferAddressSpace
,
typename
T
,
typename
ElementSpaceSize
>
constexpr
auto
make_dynamic_buffer
(
T
*
p
,
ElementSpaceSize
element_space_size
)
constexpr
auto
make_dynamic_buffer
(
T
*
p
,
ElementSpaceSize
element_space_size
)
{
{
return
DynamicBuffer
<
BufferAddressSpace
,
T
,
ElementSpaceSize
,
true
>
{
p
,
element_space_size
};
return
DynamicBuffer
<
BufferAddressSpace
,
T
,
ElementSpaceSize
,
true
>
{
p
,
element_space_size
};
}
}
template
<
template
<
AddressSpaceEnum
_t
BufferAddressSpace
,
AddressSpaceEnum
BufferAddressSpace
,
typename
T
,
typename
T
,
typename
ElementSpaceSize
,
typename
ElementSpaceSize
,
typename
X
,
typename
X
,
...
...
test/cpu_threadwise_transfer/cpu_threadwise_transfer.cpp
View file @
07af8343
...
@@ -209,18 +209,18 @@ int main(int argc, char** argv)
...
@@ -209,18 +209,18 @@ int main(int argc, char** argv)
};
};
using
threadwise_transfer_t
=
ck
::
cpu
::
ThreadwiseTensorSliceTransferAvx2
<
using
threadwise_transfer_t
=
ck
::
cpu
::
ThreadwiseTensorSliceTransferAvx2
<
AType
,
// SrcData
AType
,
// SrcData
AType
,
// DstData
AType
,
// DstData
decltype
(
input_desc
),
// SrcDesc
decltype
(
input_desc
),
// SrcDesc
decltype
(
input_cblock_desc
),
// DstDesc
decltype
(
input_cblock_desc
),
// DstDesc
PassThrough
,
// ElementwiseOperation
PassThrough
,
// ElementwiseOperation
decltype
(
get_slice_length
()),
// SliceLengths
decltype
(
get_slice_length
()),
// SliceLengths
decltype
(
get_dim_access_order
()),
// DimAccessOrder
decltype
(
get_dim_access_order
()),
// DimAccessOrder
1
,
// VectorDim
1
,
// VectorDim
1
,
// ScalarPerVector
1
,
// ScalarPerVector
ck
::
InMemoryDataOperationEnum
_t
::
Set
,
// InMemoryDataOperationEnum
_t
ck
::
InMemoryDataOperationEnum
::
Set
,
// InMemoryDataOperationEnum
false
,
// SrcResetCoordinateAfterRun
false
,
// SrcResetCoordinateAfterRun
true
// DstResetCoordinateAfterRun
true
// DstResetCoordinateAfterRun
>
;
>
;
static
constexpr
ck
::
index_t
nDim
=
static
constexpr
ck
::
index_t
nDim
=
...
@@ -232,10 +232,10 @@ int main(int argc, char** argv)
...
@@ -232,10 +232,10 @@ int main(int argc, char** argv)
ck
::
make_zero_multi_index
<
nDim
>
(),
ck
::
make_zero_multi_index
<
nDim
>
(),
PassThrough
{}};
PassThrough
{}};
auto
input_buf
=
ck
::
cpu
::
make_dynamic_buffer
<
ck
::
AddressSpaceEnum
_t
::
Global
>
(
auto
input_buf
=
ck
::
cpu
::
make_dynamic_buffer
<
ck
::
AddressSpaceEnum
::
Global
>
(
static_cast
<
AType
*>
(
input_mem
.
mpDeviceBuf
),
input_mem
.
mMemSize
/
sizeof
(
AType
));
static_cast
<
AType
*>
(
input_mem
.
mpDeviceBuf
),
input_mem
.
mMemSize
/
sizeof
(
AType
));
auto
input_cblock
=
ck
::
cpu
::
make_dynamic_buffer
<
ck
::
AddressSpaceEnum
_t
::
Global
>
(
auto
input_cblock
=
ck
::
cpu
::
make_dynamic_buffer
<
ck
::
AddressSpaceEnum
::
Global
>
(
static_cast
<
AType
*>
(
input_cblock_mem
.
mpDeviceBuf
),
static_cast
<
AType
*>
(
input_cblock_mem
.
mpDeviceBuf
),
input_cblock_mem
.
mMemSize
/
sizeof
(
AType
));
input_cblock_mem
.
mMemSize
/
sizeof
(
AType
));
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment