Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
78be5944
Commit
78be5944
authored
Jun 29, 2022
by
Paul
Browse files
Imprve indexing and syncthreads
parent
ecfb0b72
Changes
6
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
66 additions
and
19 deletions
+66
-19
src/targets/gpu/compile_hip.cpp
src/targets/gpu/compile_hip.cpp
+3
-0
src/targets/gpu/kernels/include/migraphx/kernels/array.hpp
src/targets/gpu/kernels/include/migraphx/kernels/array.hpp
+10
-0
src/targets/gpu/kernels/include/migraphx/kernels/functional.hpp
...rgets/gpu/kernels/include/migraphx/kernels/functional.hpp
+18
-0
src/targets/gpu/kernels/include/migraphx/kernels/index.hpp
src/targets/gpu/kernels/include/migraphx/kernels/index.hpp
+4
-4
src/targets/gpu/kernels/include/migraphx/kernels/preload.hpp
src/targets/gpu/kernels/include/migraphx/kernels/preload.hpp
+2
-1
src/targets/gpu/kernels/include/migraphx/kernels/shape.hpp
src/targets/gpu/kernels/include/migraphx/kernels/shape.hpp
+29
-14
No files found.
src/targets/gpu/compile_hip.cpp
View file @
78be5944
...
@@ -43,6 +43,7 @@ inline namespace MIGRAPHX_INLINE_NS {
...
@@ -43,6 +43,7 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace
gpu
{
namespace
gpu
{
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_GPU_DEBUG
);
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_GPU_DEBUG
);
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_GPU_DEBUG_SYM
);
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_GPU_OPTIMIZE
);
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_GPU_OPTIMIZE
);
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_GPU_DUMP_ASM
);
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_GPU_DUMP_ASM
);
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_GPU_DUMP_SRC
);
MIGRAPHX_DECLARE_ENV_VAR
(
MIGRAPHX_GPU_DUMP_SRC
);
...
@@ -227,6 +228,8 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std
...
@@ -227,6 +228,8 @@ compile_hip_src(const std::vector<src_file>& srcs, std::string params, const std
if
(
params
.
find
(
"-std="
)
==
std
::
string
::
npos
)
if
(
params
.
find
(
"-std="
)
==
std
::
string
::
npos
)
params
+=
" --std=c++17"
;
params
+=
" --std=c++17"
;
params
+=
" -fno-gpu-rdc"
;
params
+=
" -fno-gpu-rdc"
;
if
(
enabled
(
MIGRAPHX_GPU_DEBUG_SYM
{}))
params
+=
" -g"
;
params
+=
" -c"
;
params
+=
" -c"
;
if
(
is_hcc_compiler
())
if
(
is_hcc_compiler
())
{
{
...
...
src/targets/gpu/kernels/include/migraphx/kernels/array.hpp
View file @
78be5944
...
@@ -76,6 +76,7 @@ namespace migraphx {
...
@@ -76,6 +76,7 @@ namespace migraphx {
template
<
class
T
,
index_int
N
>
template
<
class
T
,
index_int
N
>
struct
array
struct
array
{
{
using
type
=
T
;
T
d
[
N
];
T
d
[
N
];
constexpr
T
&
operator
[](
index_int
i
)
constexpr
T
&
operator
[](
index_int
i
)
{
{
...
@@ -208,6 +209,15 @@ struct integral_const_array : array<T, sizeof...(Xs)>
...
@@ -208,6 +209,15 @@ struct integral_const_array : array<T, sizeof...(Xs)>
MIGRAPHX_DEVICE_CONSTEXPR
integral_const_array
()
:
base_array
({
Xs
...})
{}
MIGRAPHX_DEVICE_CONSTEXPR
integral_const_array
()
:
base_array
({
Xs
...})
{}
};
};
template
<
class
F
>
constexpr
auto
return_const_array
(
F
f
)
{
constexpr
const
auto
a
=
f
();
return
sequence
(
a
.
size
(),
[
=
](
auto
...
is
)
{
return
integral_const_array
<
typename
decltype
(
a
)
::
type
,
a
[
is
]...
>
{};
});
}
template
<
class
T
,
T
...
Xs
,
class
F
>
template
<
class
T
,
T
...
Xs
,
class
F
>
constexpr
auto
transform
(
integral_const_array
<
T
,
Xs
...
>
,
F
f
)
constexpr
auto
transform
(
integral_const_array
<
T
,
Xs
...
>
,
F
f
)
{
{
...
...
src/targets/gpu/kernels/include/migraphx/kernels/functional.hpp
View file @
78be5944
...
@@ -111,6 +111,12 @@ struct gens<1> : seq<0>
...
@@ -111,6 +111,12 @@ struct gens<1> : seq<0>
{
{
};
};
template
<
class
F
,
index_int
...
Ns
>
constexpr
void
repeat_c_impl
(
F
f
,
seq
<
Ns
...
>
)
{
swallow
{(
f
(
std
::
integral_constant
<
std
::
size_t
,
Ns
>
{}),
0
)...};
}
template
<
class
F
,
index_int
...
Ns
>
template
<
class
F
,
index_int
...
Ns
>
constexpr
auto
sequence_c_impl
(
F
&&
f
,
seq
<
Ns
...
>
)
constexpr
auto
sequence_c_impl
(
F
&&
f
,
seq
<
Ns
...
>
)
{
{
...
@@ -143,6 +149,18 @@ constexpr auto sequence(IntegerConstant ic, F&& f)
...
@@ -143,6 +149,18 @@ constexpr auto sequence(IntegerConstant ic, F&& f)
return
sequence_c
<
ic
>
(
f
);
return
sequence_c
<
ic
>
(
f
);
}
}
template
<
std
::
size_t
N
,
class
F
>
constexpr
void
repeat_c
(
F
f
)
{
detail
::
repeat_c_impl
(
f
,
detail
::
gens
<
N
>
{});
}
template
<
class
IntegerConstant
,
class
F
>
constexpr
auto
repeat
(
IntegerConstant
ic
,
F
&&
f
)
{
return
repeat_c
<
ic
>
(
f
);
}
template
<
class
F
,
class
G
>
template
<
class
F
,
class
G
>
constexpr
auto
by
(
F
f
,
G
g
)
constexpr
auto
by
(
F
f
,
G
g
)
{
{
...
...
src/targets/gpu/kernels/include/migraphx/kernels/index.hpp
View file @
78be5944
...
@@ -54,8 +54,8 @@ struct index
...
@@ -54,8 +54,8 @@ struct index
}
}
#endif
#endif
template
<
class
F
>
template
<
class
N
,
class
F
>
__device__
void
global_stride
(
index_int
n
,
F
f
)
const
__device__
void
global_stride
(
N
n
,
F
f
)
const
{
{
const
auto
stride
=
nglobal
();
const
auto
stride
=
nglobal
();
for
(
index_int
i
=
global
;
i
<
n
;
i
+=
stride
)
for
(
index_int
i
=
global
;
i
<
n
;
i
+=
stride
)
...
@@ -64,8 +64,8 @@ struct index
...
@@ -64,8 +64,8 @@ struct index
}
}
}
}
template
<
class
F
>
template
<
class
N
,
class
F
>
__device__
void
local_stride
(
index_int
n
,
F
f
)
const
__device__
void
local_stride
(
N
n
,
F
f
)
const
{
{
const
auto
stride
=
nlocal
();
const
auto
stride
=
nlocal
();
for
(
index_int
i
=
local
;
i
<
n
;
i
+=
stride
)
for
(
index_int
i
=
local
;
i
<
n
;
i
+=
stride
)
...
...
src/targets/gpu/kernels/include/migraphx/kernels/preload.hpp
View file @
78be5944
...
@@ -186,7 +186,8 @@ __device__ auto auto_preload(index idx)
...
@@ -186,7 +186,8 @@ __device__ auto auto_preload(index idx)
{
{
return
make_transform
([
=
](
auto
f
,
auto
...
xs
)
{
return
make_transform
([
=
](
auto
f
,
auto
...
xs
)
{
auto
invoke
=
[
=
](
auto
...
ys
)
{
auto
invoke
=
[
=
](
auto
...
ys
)
{
__syncthreads
();
if
constexpr
((
Bs
or
...))
__syncthreads
();
f
(
ys
...);
f
(
ys
...);
};
};
join
(
invoke
,
preload_copy
<
Bs
>
(
idx
,
xs
)...);
join
(
invoke
,
preload_copy
<
Bs
>
(
idx
,
xs
)...);
...
...
src/targets/gpu/kernels/include/migraphx/kernels/shape.hpp
View file @
78be5944
...
@@ -74,6 +74,20 @@ struct shape
...
@@ -74,6 +74,20 @@ struct shape
constexpr
auto
standard
()
const
{
return
packed
()
and
not
transposed
();
}
constexpr
auto
standard
()
const
{
return
packed
()
and
not
transposed
();
}
static
constexpr
auto
compute_standard_strides
()
{
return
return_const_array
([]
{
index_array
result
{};
index_int
s
=
1
;
for
(
diff_int
is
=
result
.
size
()
-
1
;
is
>=
0
;
is
--
)
{
result
[
is
]
=
s
;
s
*=
Lens
{}[
is
];
}
return
result
;
});
}
constexpr
index_int
index
(
index_array
x
)
const
{
return
x
.
dot
(
strides
);
}
constexpr
index_int
index
(
index_array
x
)
const
{
return
x
.
dot
(
strides
);
}
constexpr
index_int
index
(
std
::
initializer_list
<
index_int
>
x
)
const
constexpr
index_int
index
(
std
::
initializer_list
<
index_int
>
x
)
const
...
@@ -86,8 +100,7 @@ struct shape
...
@@ -86,8 +100,7 @@ struct shape
constexpr
index_int
index
(
index_int
i
)
const
constexpr
index_int
index
(
index_int
i
)
const
{
{
if
(
this
->
standard
())
if
constexpr
(
shape
{}.
standard
())
{
MIGRAPHX_ASSERT
(
i
==
compute_index
(
i
));
MIGRAPHX_ASSERT
(
i
==
compute_index
(
i
));
return
i
;
return
i
;
}
}
...
@@ -99,19 +112,21 @@ struct shape
...
@@ -99,19 +112,21 @@ struct shape
constexpr
index_int
compute_index
(
index_int
i
)
const
constexpr
index_int
compute_index
(
index_int
i
)
const
{
{
const
auto
rank
=
this
->
lens
.
size
();
const
expr
auto
sstrides
=
compute_standard_strides
();
index_int
s
=
1
;
constexpr
auto
rank
=
Lens
{}.
size
()
;
index_int
result
=
0
;
index_int
result
=
0
;
for
(
index_int
j
=
0
;
j
<
rank
;
j
++
)
repeat
(
rank
,
[
&
](
auto
j
)
{
{
constexpr
auto
k
=
rank
-
j
-
1
;
const
index_int
k
=
rank
-
j
-
1
;
constexpr
auto
stride
=
Strides
{}[
k
];
const
index_int
stride
=
this
->
strides
[
k
];
constexpr
auto
len
=
Lens
{}[
k
];
const
index_int
len
=
this
->
lens
[
k
];
if
constexpr
(
stride
!=
0
)
const
index_int
slen
=
s
*
len
;
{
const
index_int
idx
=
(
i
%
slen
)
/
s
;
constexpr
auto
s
=
sstrides
[
k
];
result
+=
stride
*
idx
;
constexpr
auto
slen
=
s
*
len
;
s
=
slen
;
auto
idx
=
(
i
%
slen
)
/
s
;
}
result
+=
stride
*
idx
;
}
});
return
result
;
return
result
;
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment