Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
977fc1ed
Commit
977fc1ed
authored
Apr 11, 2019
by
Jing Zhang
Browse files
clean
parent
e6230689
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
92 additions
and
92 deletions
+92
-92
driver/driver.hip.cpp
driver/driver.hip.cpp
+2
-2
src/include/amd_inline_asm.hip.hpp
src/include/amd_inline_asm.hip.hpp
+90
-90
No files found.
driver/driver.hip.cpp
View file @
977fc1ed
...
@@ -610,10 +610,10 @@ int main(int argc, char* argv[])
...
@@ -610,10 +610,10 @@ int main(int argc, char* argv[])
#elif 1
#elif 1
// 1x1 filter, 14x14 image, C = 512
// 1x1 filter, 14x14 image, C = 512
constexpr
index_t
N
=
128
;
constexpr
index_t
N
=
128
;
constexpr
index_t
C
=
12
8
;
constexpr
index_t
C
=
5
12
;
constexpr
index_t
HI
=
14
;
constexpr
index_t
HI
=
14
;
constexpr
index_t
WI
=
14
;
constexpr
index_t
WI
=
14
;
constexpr
index_t
K
=
12
8
;
constexpr
index_t
K
=
5
12
;
constexpr
index_t
Y
=
1
;
constexpr
index_t
Y
=
1
;
constexpr
index_t
X
=
1
;
constexpr
index_t
X
=
1
;
...
...
src/include/amd_inline_asm.hip.hpp
View file @
977fc1ed
...
@@ -13,7 +13,7 @@ extern "C" __attribute__((address_space(3))) void* __to_local(void* p)[[hc]];
...
@@ -13,7 +13,7 @@ extern "C" __attribute__((address_space(3))) void* __to_local(void* p)[[hc]];
#define data4_t vector_type<float, 4>::MemoryType
#define data4_t vector_type<float, 4>::MemoryType
#define data_t float
#define data_t float
template
<
unsigned
cnt
>
template
<
unsigned
cnt
>
inline
__device__
void
vmcnt
()
inline
__device__
void
vmcnt
()
{
{
#if !NO_VM_WAIT
#if !NO_VM_WAIT
...
@@ -95,8 +95,8 @@ inline __device__ void lgkmcnt(index_t cnt)
...
@@ -95,8 +95,8 @@ inline __device__ void lgkmcnt(index_t cnt)
}
}
inline
__device__
void
outerProduct1x4
(
const
data_t
&
a
,
inline
__device__
void
outerProduct1x4
(
const
data_t
&
a
,
const
data4_t
&
b
,
const
data4_t
&
b
,
data4_t
&
c
)
data4_t
&
c
)
{
{
#if 0
#if 0
asm volatile(
asm volatile(
...
@@ -111,8 +111,8 @@ inline __device__ void outerProduct1x4(const data_t& a,
...
@@ -111,8 +111,8 @@ inline __device__ void outerProduct1x4(const data_t& a,
"v"(a.x),"v"(b.x),"v"(b.y),"v"(b.z),"v"(b.w)
"v"(a.x),"v"(b.x),"v"(b.y),"v"(b.z),"v"(b.w)
);
);
#else
#else
//hijack here due to a compiler issue that cannot perform proper register
//hijack here due to a compiler issue that cannot perform proper register
//mapping for float4 c
//mapping for float4 c
data_t
*
c_p
=
(
data_t
*
)
&
c
;
data_t
*
c_p
=
(
data_t
*
)
&
c
;
asm
volatile
(
"
\n
\
asm
volatile
(
"
\n
\
v_mac_f32 %0, %4, %5
\n
\
v_mac_f32 %0, %4, %5
\n
\
...
@@ -120,25 +120,25 @@ inline __device__ void outerProduct1x4(const data_t& a,
...
@@ -120,25 +120,25 @@ inline __device__ void outerProduct1x4(const data_t& a,
v_mac_f32 %2, %4, %7
\n
\
v_mac_f32 %2, %4, %7
\n
\
v_mac_f32 %3, %4, %8
\n
\
v_mac_f32 %3, %4, %8
\n
\
"
"
:
"=v"
(
c
[
0
]),
"=v"
(
c
[
1
]),
"=v"
(
c
[
2
]),
"=v"
(
c
[
3
])
:
"=v"
(
c
[
0
]),
"=v"
(
c
[
1
]),
"=v"
(
c
[
2
]),
"=v"
(
c
[
3
])
:
"v"
(
a
),
:
"v"
(
a
),
"v"
(
b
.
x
),
"v"
(
b
.
x
),
"v"
(
b
.
y
),
"v"
(
b
.
y
),
"v"
(
b
.
z
),
"v"
(
b
.
z
),
"v"
(
b
.
w
),
"v"
(
b
.
w
),
"0"
(
c
[
0
]),
"0"
(
c
[
0
]),
"1"
(
c
[
1
]),
"1"
(
c
[
1
]),
"2"
(
c
[
2
]),
"2"
(
c
[
2
]),
"3"
(
c
[
3
]));
"3"
(
c
[
3
]));
#endif
#endif
}
}
inline
__device__
void
outerProduct4x4
(
const
data4_t
&
a
,
inline
__device__
void
outerProduct4x4
(
const
data4_t
&
a
,
const
data4_t
&
b
,
const
data4_t
&
b
,
data4_t
&
c0
,
data4_t
&
c0
,
data4_t
&
c1
,
data4_t
&
c1
,
data4_t
&
c2
,
data4_t
&
c2
,
data4_t
&
c3
)
data4_t
&
c3
)
{
{
outerProduct1x4
(
a
.
x
,
b
,
c0
);
outerProduct1x4
(
a
.
x
,
b
,
c0
);
outerProduct1x4
(
a
.
y
,
b
,
c1
);
outerProduct1x4
(
a
.
y
,
b
,
c1
);
...
@@ -147,8 +147,8 @@ inline __device__ void outerProduct4x4(const data4_t& a,
...
@@ -147,8 +147,8 @@ inline __device__ void outerProduct4x4(const data4_t& a,
}
}
inline
__device__
void
outerProduct8x8
(
const
data4_t
*
a
,
inline
__device__
void
outerProduct8x8
(
const
data4_t
*
a
,
const
data4_t
*
b
,
const
data4_t
*
b
,
data4_t
*
c
)
data4_t
*
c
)
{
{
outerProduct4x4
(
a
[
0
],
b
[
0
],
c
[
0
],
c
[
2
],
c
[
4
],
c
[
6
]);
outerProduct4x4
(
a
[
0
],
b
[
0
],
c
[
0
],
c
[
2
],
c
[
4
],
c
[
6
]);
outerProduct4x4
(
a
[
0
],
b
[
1
],
c
[
1
],
c
[
3
],
c
[
5
],
c
[
7
]);
outerProduct4x4
(
a
[
0
],
b
[
1
],
c
[
1
],
c
[
3
],
c
[
5
],
c
[
7
]);
...
@@ -164,216 +164,216 @@ inline __device__ void ds_read_b128(data4_t& r, void* lds, index_t offset = 0)
...
@@ -164,216 +164,216 @@ inline __device__ void ds_read_b128(data4_t& r, void* lds, index_t offset = 0)
asm
volatile
(
"
\n
\
asm
volatile
(
"
\n
\
ds_read_b128 %0, %1 offset:0
\n
\
ds_read_b128 %0, %1 offset:0
\n
\
"
"
:
"=v"
(
r
)
:
"=v"
(
r
)
:
"v"
(
__to_local
(
lds
)));
:
"v"
(
__to_local
(
lds
)));
}
}
else
if
(
offset
==
128
)
else
if
(
offset
==
128
)
{
{
asm
volatile
(
"
\n
\
asm
volatile
(
"
\n
\
ds_read_b128 %0, %1 offset:128
\n
\
ds_read_b128 %0, %1 offset:128
\n
\
"
"
:
"=v"
(
r
)
:
"=v"
(
r
)
:
"v"
(
__to_local
(
lds
)));
:
"v"
(
__to_local
(
lds
)));
}
}
else
if
(
offset
==
256
)
else
if
(
offset
==
256
)
{
{
asm
volatile
(
"
\n
\
asm
volatile
(
"
\n
\
ds_read_b128 %0, %1 offset:256
\n
\
ds_read_b128 %0, %1 offset:256
\n
\
"
"
:
"=v"
(
r
)
:
"=v"
(
r
)
:
"v"
(
__to_local
(
lds
)));
:
"v"
(
__to_local
(
lds
)));
}
}
else
if
(
offset
==
384
)
else
if
(
offset
==
384
)
{
{
asm
volatile
(
"
\n
\
asm
volatile
(
"
\n
\
ds_read_b128 %0, %1 offset:384
\n
\
ds_read_b128 %0, %1 offset:384
\n
\
"
"
:
"=v"
(
r
)
:
"=v"
(
r
)
:
"v"
(
__to_local
(
lds
)));
:
"v"
(
__to_local
(
lds
)));
}
}
else
if
(
offset
==
512
)
else
if
(
offset
==
512
)
{
{
asm
volatile
(
"
\n
\
asm
volatile
(
"
\n
\
ds_read_b128 %0, %1 offset:512
\n
\
ds_read_b128 %0, %1 offset:512
\n
\
"
"
:
"=v"
(
r
)
:
"=v"
(
r
)
:
"v"
(
__to_local
(
lds
)));
:
"v"
(
__to_local
(
lds
)));
}
}
else
if
(
offset
==
640
)
else
if
(
offset
==
640
)
{
{
asm
volatile
(
"
\n
\
asm
volatile
(
"
\n
\
ds_read_b128 %0, %1 offset:640
\n
\
ds_read_b128 %0, %1 offset:640
\n
\
"
"
:
"=v"
(
r
)
:
"=v"
(
r
)
:
"v"
(
__to_local
(
lds
)));
:
"v"
(
__to_local
(
lds
)));
}
}
else
if
(
offset
==
768
)
else
if
(
offset
==
768
)
{
{
asm
volatile
(
"
\n
\
asm
volatile
(
"
\n
\
ds_read_b128 %0, %1 offset:768
\n
\
ds_read_b128 %0, %1 offset:768
\n
\
"
"
:
"=v"
(
r
)
:
"=v"
(
r
)
:
"v"
(
__to_local
(
lds
)));
:
"v"
(
__to_local
(
lds
)));
}
}
else
if
(
offset
==
896
)
else
if
(
offset
==
896
)
{
{
asm
volatile
(
"
\n
\
asm
volatile
(
"
\n
\
ds_read_b128 %0, %1 offset:896
\n
\
ds_read_b128 %0, %1 offset:896
\n
\
"
"
:
"=v"
(
r
)
:
"=v"
(
r
)
:
"v"
(
__to_local
(
lds
)));
:
"v"
(
__to_local
(
lds
)));
}
}
else
if
(
offset
==
1024
)
else
if
(
offset
==
1024
)
{
{
asm
volatile
(
"
\n
\
asm
volatile
(
"
\n
\
ds_read_b128 %0, %1 offset:1024
\n
\
ds_read_b128 %0, %1 offset:1024
\n
\
"
"
:
"=v"
(
r
)
:
"=v"
(
r
)
:
"v"
(
__to_local
(
lds
)));
:
"v"
(
__to_local
(
lds
)));
}
}
else
if
(
offset
==
1152
)
else
if
(
offset
==
1152
)
{
{
asm
volatile
(
"
\n
\
asm
volatile
(
"
\n
\
ds_read_b128 %0, %1 offset:1152
\n
\
ds_read_b128 %0, %1 offset:1152
\n
\
"
"
:
"=v"
(
r
)
:
"=v"
(
r
)
:
"v"
(
__to_local
(
lds
)));
:
"v"
(
__to_local
(
lds
)));
}
}
else
if
(
offset
==
1280
)
else
if
(
offset
==
1280
)
{
{
asm
volatile
(
"
\n
\
asm
volatile
(
"
\n
\
ds_read_b128 %0, %1 offset:1280
\n
\
ds_read_b128 %0, %1 offset:1280
\n
\
"
"
:
"=v"
(
r
)
:
"=v"
(
r
)
:
"v"
(
__to_local
(
lds
)));
:
"v"
(
__to_local
(
lds
)));
}
}
else
if
(
offset
==
1408
)
else
if
(
offset
==
1408
)
{
{
asm
volatile
(
"
\n
\
asm
volatile
(
"
\n
\
ds_read_b128 %0, %1 offset:1408
\n
\
ds_read_b128 %0, %1 offset:1408
\n
\
"
"
:
"=v"
(
r
)
:
"=v"
(
r
)
:
"v"
(
__to_local
(
lds
)));
:
"v"
(
__to_local
(
lds
)));
}
}
else
if
(
offset
==
1536
)
else
if
(
offset
==
1536
)
{
{
asm
volatile
(
"
\n
\
asm
volatile
(
"
\n
\
ds_read_b128 %0, %1 offset:1536
\n
\
ds_read_b128 %0, %1 offset:1536
\n
\
"
"
:
"=v"
(
r
)
:
"=v"
(
r
)
:
"v"
(
__to_local
(
lds
)));
:
"v"
(
__to_local
(
lds
)));
}
}
else
if
(
offset
==
1664
)
else
if
(
offset
==
1664
)
{
{
asm
volatile
(
"
\n
\
asm
volatile
(
"
\n
\
ds_read_b128 %0, %1 offset:1664
\n
\
ds_read_b128 %0, %1 offset:1664
\n
\
"
"
:
"=v"
(
r
)
:
"=v"
(
r
)
:
"v"
(
__to_local
(
lds
)));
:
"v"
(
__to_local
(
lds
)));
}
}
else
if
(
offset
==
1792
)
else
if
(
offset
==
1792
)
{
{
asm
volatile
(
"
\n
\
asm
volatile
(
"
\n
\
ds_read_b128 %0, %1 offset:1792
\n
\
ds_read_b128 %0, %1 offset:1792
\n
\
"
"
:
"=v"
(
r
)
:
"=v"
(
r
)
:
"v"
(
__to_local
(
lds
)));
:
"v"
(
__to_local
(
lds
)));
}
}
else
if
(
offset
==
1920
)
else
if
(
offset
==
1920
)
{
{
asm
volatile
(
"
\n
\
asm
volatile
(
"
\n
\
ds_read_b128 %0, %1 offset:1920
\n
\
ds_read_b128 %0, %1 offset:1920
\n
\
"
"
:
"=v"
(
r
)
:
"=v"
(
r
)
:
"v"
(
__to_local
(
lds
)));
:
"v"
(
__to_local
(
lds
)));
}
}
else
if
(
offset
==
2048
)
else
if
(
offset
==
2048
)
{
{
asm
volatile
(
"
\n
\
asm
volatile
(
"
\n
\
ds_read_b128 %0, %1 offset:2048
\n
\
ds_read_b128 %0, %1 offset:2048
\n
\
"
"
:
"=v"
(
r
)
:
"=v"
(
r
)
:
"v"
(
__to_local
(
lds
)));
:
"v"
(
__to_local
(
lds
)));
}
}
else
if
(
offset
==
2176
)
else
if
(
offset
==
2176
)
{
{
asm
volatile
(
"
\n
\
asm
volatile
(
"
\n
\
ds_read_b128 %0, %1 offset:2176
\n
\
ds_read_b128 %0, %1 offset:2176
\n
\
"
"
:
"=v"
(
r
)
:
"=v"
(
r
)
:
"v"
(
__to_local
(
lds
)));
:
"v"
(
__to_local
(
lds
)));
}
}
else
if
(
offset
==
2304
)
else
if
(
offset
==
2304
)
{
{
asm
volatile
(
"
\n
\
asm
volatile
(
"
\n
\
ds_read_b128 %0, %1 offset:2304
\n
\
ds_read_b128 %0, %1 offset:2304
\n
\
"
"
:
"=v"
(
r
)
:
"=v"
(
r
)
:
"v"
(
__to_local
(
lds
)));
:
"v"
(
__to_local
(
lds
)));
}
}
else
if
(
offset
==
2560
)
else
if
(
offset
==
2560
)
{
{
asm
volatile
(
"
\n
\
asm
volatile
(
"
\n
\
ds_read_b128 %0, %1 offset:2560
\n
\
ds_read_b128 %0, %1 offset:2560
\n
\
"
"
:
"=v"
(
r
)
:
"=v"
(
r
)
:
"v"
(
__to_local
(
lds
)));
:
"v"
(
__to_local
(
lds
)));
}
}
else
if
(
offset
==
2816
)
else
if
(
offset
==
2816
)
{
{
asm
volatile
(
"
\n
\
asm
volatile
(
"
\n
\
ds_read_b128 %0, %1 offset:2816
\n
\
ds_read_b128 %0, %1 offset:2816
\n
\
"
"
:
"=v"
(
r
)
:
"=v"
(
r
)
:
"v"
(
__to_local
(
lds
)));
:
"v"
(
__to_local
(
lds
)));
}
}
else
if
(
offset
==
3072
)
else
if
(
offset
==
3072
)
{
{
asm
volatile
(
"
\n
\
asm
volatile
(
"
\n
\
ds_read_b128 %0, %1 offset:3072
\n
\
ds_read_b128 %0, %1 offset:3072
\n
\
"
"
:
"=v"
(
r
)
:
"=v"
(
r
)
:
"v"
(
__to_local
(
lds
)));
:
"v"
(
__to_local
(
lds
)));
}
}
else
if
(
offset
==
3328
)
else
if
(
offset
==
3328
)
{
{
asm
volatile
(
"
\n
\
asm
volatile
(
"
\n
\
ds_read_b128 %0, %1 offset:3328
\n
\
ds_read_b128 %0, %1 offset:3328
\n
\
"
"
:
"=v"
(
r
)
:
"=v"
(
r
)
:
"v"
(
__to_local
(
lds
)));
:
"v"
(
__to_local
(
lds
)));
}
}
else
if
(
offset
==
3584
)
else
if
(
offset
==
3584
)
{
{
asm
volatile
(
"
\n
\
asm
volatile
(
"
\n
\
ds_read_b128 %0, %1 offset:3584
\n
\
ds_read_b128 %0, %1 offset:3584
\n
\
"
"
:
"=v"
(
r
)
:
"=v"
(
r
)
:
"v"
(
__to_local
(
lds
)));
:
"v"
(
__to_local
(
lds
)));
}
}
else
if
(
offset
==
3840
)
else
if
(
offset
==
3840
)
{
{
asm
volatile
(
"
\n
\
asm
volatile
(
"
\n
\
ds_read_b128 %0, %1 offset:3840
\n
\
ds_read_b128 %0, %1 offset:3840
\n
\
"
"
:
"=v"
(
r
)
:
"=v"
(
r
)
:
"v"
(
__to_local
(
lds
)));
:
"v"
(
__to_local
(
lds
)));
}
}
else
if
(
offset
==
4096
)
else
if
(
offset
==
4096
)
{
{
asm
volatile
(
"
\n
\
asm
volatile
(
"
\n
\
ds_read_b128 %0, %1 offset:4096
\n
\
ds_read_b128 %0, %1 offset:4096
\n
\
"
"
:
"=v"
(
r
)
:
"=v"
(
r
)
:
"v"
(
__to_local
(
lds
)));
:
"v"
(
__to_local
(
lds
)));
}
}
else
if
(
offset
==
4352
)
else
if
(
offset
==
4352
)
{
{
asm
volatile
(
"
\n
\
asm
volatile
(
"
\n
\
ds_read_b128 %0, %1 offset:4352
\n
\
ds_read_b128 %0, %1 offset:4352
\n
\
"
"
:
"=v"
(
r
)
:
"=v"
(
r
)
:
"v"
(
__to_local
(
lds
)));
:
"v"
(
__to_local
(
lds
)));
}
}
else
else
{
{
...
@@ -383,8 +383,8 @@ inline __device__ void ds_read_b128(data4_t& r, void* lds, index_t offset = 0)
...
@@ -383,8 +383,8 @@ inline __device__ void ds_read_b128(data4_t& r, void* lds, index_t offset = 0)
}
}
inline
__device__
void
global_store
(
data4_t
&
r
,
inline
__device__
void
global_store
(
data4_t
&
r
,
const
void
*
vptr
,
const
void
*
vptr
,
const
void
*
sprt
=
0
)
const
void
*
sprt
=
0
)
{
{
#if !NO_GLB_READ
#if !NO_GLB_READ
if
(
sprt
==
0
)
if
(
sprt
==
0
)
...
@@ -407,8 +407,8 @@ inline __device__ void global_store(data4_t& r,
...
@@ -407,8 +407,8 @@ inline __device__ void global_store(data4_t& r,
inline
__device__
void
global_load
(
data4_t
&
r
,
inline
__device__
void
global_load
(
data4_t
&
r
,
const
void
*
vptr
,
const
void
*
vptr
,
const
void
*
sprt
=
0
)
const
void
*
sprt
=
0
)
{
{
#if !NO_GLB_READ
#if !NO_GLB_READ
if
(
sprt
==
0
)
if
(
sprt
==
0
)
...
@@ -421,26 +421,26 @@ inline __device__ void global_load(data4_t& r,
...
@@ -421,26 +421,26 @@ inline __device__ void global_load(data4_t& r,
}
}
else
else
{
{
asm
volatile
(
"
\n
\
asm
volatile
(
"
\n
\
global_load_dwordx4 %0, %1, %2
\n
\
global_load_dwordx4 %0, %1, %2
\n
\
"
"
:
"=v"
(
r
)
:
"=v"
(
r
)
:
"v"
(
vptr
),
"s"
(
sprt
));
:
"v"
(
vptr
),
"s"
(
sprt
));
}
}
#endif
#endif
}
}
inline
__device__
void
inline
__device__
void
ds_write_b128
(
const
data4_t
&
r
,
void
*
lds
,
index_t
offset
=
0
)
ds_write_b128
(
const
data4_t
&
r
,
void
*
lds
,
index_t
offset
=
0
)
{
{
#if !NO_DS_WRITE
#if !NO_DS_WRITE
if
(
offset
==
0
)
if
(
offset
==
0
)
{
{
asm
volatile
(
"
\n
\
asm
volatile
(
"
\n
\
ds_write_b128 %0, %1
\n
\
ds_write_b128 %0, %1
\n
\
"
"
:
:
:
"v"
(
__to_local
(
lds
)),
"v"
(
r
));
:
"v"
(
__to_local
(
lds
)),
"v"
(
r
));
}
}
else
else
{
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment