Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
18d40f68
"vscode:/vscode.git/clone" did not exist on "b3c6d6768411e9330b098315592350ff2c8f0842"
Commit
18d40f68
authored
Mar 28, 2019
by
Jing Zhang
Browse files
inline ds_read
parent
60d30627
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
25 additions
and
2 deletions
+25
-2
src/include/threadwise_gemm.hip.hpp
src/include/threadwise_gemm.hip.hpp
+25
-2
No files found.
src/include/threadwise_gemm.hip.hpp
View file @
18d40f68
#pragma once
#pragma once
extern
"C"
__attribute__
((
address_space
(
3
)))
void
*
__to_local
(
void
*
p
)
[[
hc
]];
template
<
class
Float
,
class
SrcMatrix
,
class
DstMatrix
,
unsigned
NRow
,
unsigned
NCol
>
template
<
class
Float
,
class
SrcMatrix
,
class
DstMatrix
,
unsigned
NRow
,
unsigned
NCol
>
__device__
void
threadwise_matrix_copy
(
SrcMatrix
,
__device__
void
threadwise_matrix_copy
(
SrcMatrix
,
const
Float
*
__restrict__
p_src
,
const
Float
*
__restrict__
p_src
,
...
@@ -12,13 +14,34 @@ __device__ void threadwise_matrix_copy(SrcMatrix,
...
@@ -12,13 +14,34 @@ __device__ void threadwise_matrix_copy(SrcMatrix,
for
(
unsigned
i
=
0
;
i
<
NRow
;
++
i
)
for
(
unsigned
i
=
0
;
i
<
NRow
;
++
i
)
{
{
#if 1
assert
(
NCol
==
8
);
{
const
unsigned
src_index
=
src_mtx
.
Get1dIndex
(
i
,
0
);
const
unsigned
dst_index
=
dst_mtx
.
Get1dIndex
(
i
,
0
);
const
float4
*
loc
=
(
const
float4
*
)(
p_src
+
src_index
);
float4
*
reg
=
(
float4
*
)(
p_dst
+
dst_index
);
//reg[0] = loc[0];
//reg[1] = loc[1];
asm
volatile
(
"
\n
\
ds_read2_b64 %0, %2 offset1:1
\n
\
ds_read2_b64 %1, %2 offset0:16 offset1:17
\n
\
s_waitcnt lgkmcnt(0)"
:
"=v"
(
reg
[
0
]),
"=v"
(
reg
[
1
])
:
"v"
(
__to_local
((
void
*
)
&
p_src
[
src_index
])));
}
#else
for
(
unsigned
j
=
0
;
j
<
NCol
;
++
j
)
for
(
unsigned
j
=
0
;
j
<
NCol
;
++
j
)
{
{
const
unsigned
src_index
=
src_mtx
.
Get1dIndex
(
i
,
j
);
const
unsigned
src_index
=
src_mtx
.
Get1dIndex
(
i
,
j
);
const
unsigned
dst_index
=
dst_mtx
.
Get1dIndex
(
i
,
j
);
const
unsigned
dst_index
=
dst_mtx
.
Get1dIndex
(
i
,
j
);
p_dst
[
dst_index
]
=
p_src
[
src_index
];
//p_dst[dst_index] = p_src[src_index];
asm
volatile
(
"ds_read_b32 %0, %1
\n
s_waitcnt lgkmcnt(0)"
:
"=v"
(
p_dst
[
dst_index
])
:
"v"
(
__to_local
((
void
*
)
&
p_src
[
src_index
])));
}
}
#endif
}
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment