Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
569941a7
Commit
569941a7
authored
Apr 03, 2019
by
Chao Liu
Browse files
create mini code
parent
6166233e
Changes
22
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
0 additions
and
174 deletions
+0
-174
src/include/threadwise_gemm.hip.hpp
src/include/threadwise_gemm.hip.hpp
+0
-10
src/include/threadwise_nd_tensor_op.hip.hpp
src/include/threadwise_nd_tensor_op.hip.hpp
+0
-164
No files found.
src/include/threadwise_gemm.hip.hpp
View file @
569941a7
...
@@ -78,17 +78,7 @@ __device__ void threadwise_gemm(MatrixA,
...
@@ -78,17 +78,7 @@ __device__ void threadwise_gemm(MatrixA,
const
index_t
bindex
=
b_mtx
.
Get1dIndex
(
k
,
j
);
const
index_t
bindex
=
b_mtx
.
Get1dIndex
(
k
,
j
);
const
index_t
cindex
=
c_mtx
.
Get1dIndex
(
i
,
j
);
const
index_t
cindex
=
c_mtx
.
Get1dIndex
(
i
,
j
);
#if DEVICE_BACKEND_HIP // this only does c += a * b
asm
volatile
(
"
\n
\
v_mac_f32 %0, %1, %2
\n
\
"
:
"=v"
(
p_c_thread
[
cindex
])
:
"v"
(
p_a_thread
[
aindex
]),
"v"
(
p_b_thread
[
bindex
]),
"0"
(
p_c_thread
[
cindex
]));
#else // this does general accumulation defined by f_accum
f_accum
(
p_c_thread
[
cindex
],
p_a_thread
[
aindex
]
*
p_b_thread
[
bindex
]);
f_accum
(
p_c_thread
[
cindex
],
p_a_thread
[
aindex
]
*
p_b_thread
[
bindex
]);
#endif
}
}
}
}
}
}
...
...
src/include/threadwise_nd_tensor_op.hip.hpp
deleted
100644 → 0
View file @
6166233e
This diff is collapsed.
Click to expand it.
Prev
1
2
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment