Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
ad0a4ce1
Commit
ad0a4ce1
authored
May 19, 2022
by
carlushuang
Browse files
fix a bug in gcc compile
parent
ddad386b
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
11 additions
and
18 deletions
+11
-18
include/ck/tensor_operation/cpu/thread/threadwise_tensor_slice_transfer_avx2_specialization.hpp
.../threadwise_tensor_slice_transfer_avx2_specialization.hpp
+11
-18
No files found.
include/ck/tensor_operation/cpu/thread/threadwise_tensor_slice_transfer_avx2_specialization.hpp
View file @
ad0a4ce1
...
@@ -116,19 +116,15 @@ void memcpy32_avx2_with_extra_2src(void* dst,
...
@@ -116,19 +116,15 @@ void memcpy32_avx2_with_extra_2src(void* dst,
if
(
i_n
&
2
)
if
(
i_n
&
2
)
{
{
#if defined(__GNUC__) && !defined(__clang__) && !defined(__llvm__)
#if defined(__GNUC__) && !defined(__clang__) && !defined(__llvm__)
__m128i
s
=
_mm_loadu_si64
(
p_src
);
__m128i
s
=
_mm_loadu_si64
(
p_src
);
__m128
v
=
element_op
.
Apply
(
*
reinterpret_cast
<
__m128
*>
(
&
s
));
__m128i
s1
=
_mm_loadu_si64
(
p_src1
);
__m128i
s1
=
_mm_loadu_si64
(
p_src1
);
__m128
v1
=
element_op
.
Apply
(
*
reinterpret_cast
<
__m128
*>
(
&
s1
));
__m128i
s2
=
_mm_loadu_si64
(
p_src2
);
__m128i
s2
=
_mm_loadu_si64
(
p_src2
);
__m128
v2
=
element_op
.
Apply
(
*
reinterpret_cast
<
__m128
*>
(
&
s2
));
_mm_storeu_si64
(
p_dst
,
__m128
v
=
element_op
.
Apply
(
*
reinterpret_cast
<
__m128
*>
(
&
s
),
*
reinterpret_cast
<
__m128i
*>
(
&
v
),
*
reinterpret_cast
<
__m128
*>
(
&
s1
),
*
reinterpret_cast
<
__m128i
*>
(
&
v1
),
*
reinterpret_cast
<
__m128
*>
(
&
s2
));
*
reinterpret_cast
<
__m128i
*>
(
&
v2
));
_mm_storeu_si64
(
p_dst
,
*
reinterpret_cast
<
__m128i
*>
(
&
v
));
#else
#else
_mm_storeu_si64
(
p_dst
,
_mm_storeu_si64
(
p_dst
,
element_op
.
Apply
(
element_op
.
Apply
(
...
@@ -193,16 +189,13 @@ void memcpy32_avx2_with_extra_2src(void* dst,
...
@@ -193,16 +189,13 @@ void memcpy32_avx2_with_extra_2src(void* dst,
if
(
i_n
&
2
)
if
(
i_n
&
2
)
{
{
#if defined(__GNUC__) && !defined(__clang__) && !defined(__llvm__)
#if defined(__GNUC__) && !defined(__clang__) && !defined(__llvm__)
__m128i
s
=
_mm_loadu_si64
(
p_src
);
__m128i
s
=
_mm_loadu_si64
(
p_src
);
__m128
v
=
element_op
.
Apply
(
*
reinterpret_cast
<
__m128
*>
(
&
s
));
__m128i
s2
=
_mm_loadu_si64
(
p_src2
);
__m128i
s2
=
_mm_loadu_si64
(
p_src2
);
__m128
v2
=
element_op
.
Apply
(
*
reinterpret_cast
<
__m128
*>
(
&
s2
));
_
mm_storeu_si64
(
p_dst
,
_
_m128
v
=
element_op
.
Apply
(
*
reinterpret_cast
<
__m128
i
*>
(
&
v
),
*
reinterpret_cast
<
__m128
*>
(
&
s
),
xmm_src1
,
*
reinterpret_cast
<
__m128
*>
(
&
s2
));
*
reinterpret_cast
<
__m128i
*>
(
&
xmm_src1
),
*
reinterpret_cast
<
__m128i
*>
(
&
v
2
));
_mm_storeu_si64
(
p_dst
,
*
reinterpret_cast
<
__m128i
*>
(
&
v
));
#else
#else
_mm_storeu_si64
(
p_dst
,
_mm_storeu_si64
(
p_dst
,
element_op
.
Apply
(
_mm_loadu_si64
(
p_src
),
xmm_src1
,
_mm_loadu_si64
(
p_src2
)));
element_op
.
Apply
(
_mm_loadu_si64
(
p_src
),
xmm_src1
,
_mm_loadu_si64
(
p_src2
)));
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment