Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
be5f3539
Commit
be5f3539
authored
Jul 09, 2019
by
Shucai Xiao
Browse files
merge develop branch changes
parents
7e3bdc34
ebfe9735
Changes
103
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1134 additions
and
447 deletions
+1134
-447
src/targets/gpu/argmax.cpp
src/targets/gpu/argmax.cpp
+23
-0
src/targets/gpu/argmin.cpp
src/targets/gpu/argmin.cpp
+23
-0
src/targets/gpu/device/argmax.cpp
src/targets/gpu/device/argmax.cpp
+23
-0
src/targets/gpu/device/argmin.cpp
src/targets/gpu/device/argmin.cpp
+23
-0
src/targets/gpu/device/concat.cpp
src/targets/gpu/device/concat.cpp
+12
-12
src/targets/gpu/device/erf.cpp
src/targets/gpu/device/erf.cpp
+18
-0
src/targets/gpu/device/gather.cpp
src/targets/gpu/device/gather.cpp
+18
-23
src/targets/gpu/device/include/migraphx/gpu/device/array.hpp
src/targets/gpu/device/include/migraphx/gpu/device/array.hpp
+68
-0
src/targets/gpu/device/include/migraphx/gpu/device/launch.hpp
...targets/gpu/device/include/migraphx/gpu/device/launch.hpp
+42
-10
src/targets/gpu/device/include/migraphx/gpu/device/nary.hpp
src/targets/gpu/device/include/migraphx/gpu/device/nary.hpp
+95
-271
src/targets/gpu/device/include/migraphx/gpu/device/reduce.hpp
...targets/gpu/device/include/migraphx/gpu/device/reduce.hpp
+248
-0
src/targets/gpu/device/include/migraphx/gpu/device/shape.hpp
src/targets/gpu/device/include/migraphx/gpu/device/shape.hpp
+105
-0
src/targets/gpu/device/include/migraphx/gpu/device/tensor.hpp
...targets/gpu/device/include/migraphx/gpu/device/tensor.hpp
+13
-56
src/targets/gpu/device/include/migraphx/gpu/device/tensor_view.hpp
...ts/gpu/device/include/migraphx/gpu/device/tensor_view.hpp
+59
-0
src/targets/gpu/device/include/migraphx/gpu/device/types.hpp
src/targets/gpu/device/include/migraphx/gpu/device/types.hpp
+49
-6
src/targets/gpu/device/include/migraphx/gpu/device/vector.hpp
...targets/gpu/device/include/migraphx/gpu/device/vector.hpp
+76
-0
src/targets/gpu/device/include/migraphx/gpu/device/visit.hpp
src/targets/gpu/device/include/migraphx/gpu/device/visit.hpp
+170
-0
src/targets/gpu/device/logsoftmax.cpp
src/targets/gpu/device/logsoftmax.cpp
+32
-52
src/targets/gpu/device/pad.cpp
src/targets/gpu/device/pad.cpp
+20
-17
src/targets/gpu/device/reduce_sum.cpp
src/targets/gpu/device/reduce_sum.cpp
+17
-0
No files found.
src/targets/gpu/argmax.cpp
0 → 100644
View file @
be5f3539
#include <migraphx/gpu/argmax.hpp>
#include <migraphx/gpu/device/argmax.hpp>
#include <migraphx/gpu/context.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
shape
hip_argmax
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{
inputs
,
*
this
}.
has
(
2
).
standard
();
return
op
.
compute_shape
({
inputs
.
at
(
0
)});
}
argument
hip_argmax
::
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
device
::
argmax
(
ctx
.
get_stream
().
get
(),
args
.
back
(),
args
.
front
(),
op
.
axis
);
return
args
.
back
();
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/argmin.cpp
0 → 100644
View file @
be5f3539
#include <migraphx/gpu/argmin.hpp>
#include <migraphx/gpu/device/argmin.hpp>
#include <migraphx/gpu/context.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
shape
hip_argmin
::
compute_shape
(
const
std
::
vector
<
shape
>&
inputs
)
const
{
check_shapes
{
inputs
,
*
this
}.
has
(
2
).
standard
();
return
op
.
compute_shape
({
inputs
.
at
(
0
)});
}
argument
hip_argmin
::
compute
(
context
&
ctx
,
const
shape
&
,
const
std
::
vector
<
argument
>&
args
)
const
{
device
::
argmin
(
ctx
.
get_stream
().
get
(),
args
.
back
(),
args
.
front
(),
op
.
axis
);
return
args
.
back
();
}
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/argmax.cpp
0 → 100644
View file @
be5f3539
#include <migraphx/shape.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/gpu/device/argmax.hpp>
#include <migraphx/gpu/device/tensor.hpp>
#include <migraphx/gpu/device/launch.hpp>
#include <migraphx/gpu/device/types.hpp>
#include <migraphx/gpu/device/arg_op.hpp>
#include <migraphx/gpu/hip.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
argmax
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
,
int64_t
axis
)
{
arg_op
(
argmax_op
{},
stream
,
result
,
arg
,
axis
);
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/argmin.cpp
0 → 100644
View file @
be5f3539
#include <migraphx/shape.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/gpu/device/argmin.hpp>
#include <migraphx/gpu/device/tensor.hpp>
#include <migraphx/gpu/device/launch.hpp>
#include <migraphx/gpu/device/types.hpp>
#include <migraphx/gpu/device/arg_op.hpp>
#include <migraphx/gpu/hip.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
argmin
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
,
int64_t
axis
)
{
arg_op
(
argmin_op
{},
stream
,
result
,
arg
,
axis
);
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/concat.cpp
View file @
be5f3539
...
@@ -10,22 +10,22 @@ namespace gpu {
...
@@ -10,22 +10,22 @@ namespace gpu {
namespace
device
{
namespace
device
{
argument
concat
(
hipStream_t
stream
,
argument
concat
(
hipStream_t
stream
,
const
migraphx
::
shape
&
output_shape
,
const
migraphx
::
shape
&
,
std
::
vector
<
migraphx
::
argument
>
args
,
std
::
vector
<
migraphx
::
argument
>
args
,
std
::
vector
<
std
::
size_t
>
offsets
)
std
::
vector
<
std
::
size_t
>
offsets
)
{
{
for
(
std
::
size_t
l
=
0
;
l
<
args
.
size
()
-
1
;
l
++
)
auto
ninputs
=
args
.
size
()
-
1
;
for
(
std
::
size_t
j
=
0
;
j
<
ninputs
;
j
++
)
{
{
auto
argl
=
args
[
l
];
auto
&&
arg
=
args
[
j
];
std
::
size_t
nelements
=
argl
.
get_shape
().
elements
();
std
::
size_t
nelements
=
arg
.
get_shape
().
elements
();
visit_all
(
args
.
back
(),
argl
)([
&
](
auto
output
,
auto
input
)
{
auto
offset
=
offsets
[
j
];
visit_tensor_size
(
output_shape
.
lens
().
size
(),
[
&
](
auto
ndim
)
{
shape
arg_shape
{
arg
.
get_shape
().
type
(),
arg
.
get_shape
().
lens
()};
auto
*
outptr
=
output
.
data
()
+
offsets
[
l
];
hip_visit_all
(
args
.
back
(),
arg
,
arg_shape
)([
&
](
auto
output
,
auto
input
,
auto
input_shape
)
{
const
auto
*
inptr
=
input
.
data
();
gs_launch
(
stream
,
nelements
)([
=
](
auto
i
)
{
hip_tensor_descriptor
<
ndim
>
desc_input
(
input
.
get_shape
());
auto
input_idx
=
input_shape
.
multi
(
i
);
hip_tensor_descriptor
<
ndim
>
desc_output
(
output
.
get_shape
());
auto
idx
=
output
.
get_shape
().
index
(
input_idx
);
gs_launch
(
stream
,
nelements
)(
output
.
data
()[
idx
+
offset
]
=
input
[
input_idx
];
[
=
](
auto
i
)
{
outptr
[
desc_output
.
linear
(
desc_input
.
multi
(
i
))]
=
inptr
[
i
];
});
});
});
});
});
}
}
...
...
src/targets/gpu/device/erf.cpp
0 → 100644
View file @
be5f3539
#include <migraphx/gpu/device/erf.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
erf
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
nary
(
stream
,
result
,
arg
)([](
auto
x
)
{
return
::
erf
(
to_hip_type
(
x
));
});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/gather.cpp
View file @
be5f3539
...
@@ -11,35 +11,30 @@ inline namespace MIGRAPHX_INLINE_NS {
...
@@ -11,35 +11,30 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace
gpu
{
namespace
gpu
{
namespace
device
{
namespace
device
{
argument
gather
(
hipStream_t
stream
,
argument
gather
(
hipStream_t
stream
,
argument
result
,
argument
arg1
,
argument
arg2
,
int
axis
)
const
migraphx
::
shape
&
output_shape
,
std
::
vector
<
migraphx
::
argument
>
args
,
int
axis
)
{
{
auto
axis_index
=
(
axis
<
0
)
?
(
axis
+
args
[
0
].
get_shape
().
lens
().
size
())
:
axis
;
auto
axis_index
=
(
axis
<
0
)
?
(
axis
+
arg1
.
get_shape
().
lens
().
size
())
:
axis
;
visit_all
(
args
.
back
(),
args
[
0
])([
&
](
auto
output
,
auto
input
)
{
auto
&
input_shape
=
arg1
.
get_shape
();
std
::
size_t
nelements
=
output_shape
.
elements
();
auto
lens
=
input_shape
.
lens
();
args
[
1
].
visit
([
&
](
auto
indices
)
{
lens
[
axis_index
]
=
arg2
.
get_shape
().
elements
();
const
auto
*
indices_ptr
=
device_cast
(
indices
.
data
());
shape
out_comp_shape
{
result
.
get_shape
().
type
(),
lens
};
auto
*
out_ptr
=
device_cast
(
output
.
data
());
std
::
size_t
nelements
=
result
.
get_shape
().
elements
();
const
auto
*
in_ptr
=
device_cast
(
input
.
data
());
auto
&
input_shape
=
args
[
0
].
get_shape
();
visit_all
(
result
,
arg1
)([
&
](
auto
output
,
auto
input_v
)
{
auto
lens
=
input_shape
.
lens
();
hip_visit_views
(
input_v
,
out_comp_shape
)([
&
](
auto
input
,
auto
out_comp
)
{
lens
[
axis_index
]
=
args
[
1
].
get_shape
().
elements
();
arg2
.
visit
([
&
](
auto
indices
)
{
migraphx
::
shape
out_comp_shape
{
output_shape
.
type
(),
lens
};
const
auto
*
indices_ptr
=
device_cast
(
indices
.
data
());
visit_tensor_size
(
out_comp_shape
.
lens
().
size
(),
[
&
](
auto
n_out_dim
)
{
auto
*
output_ptr
=
device_cast
(
output
.
data
());
hip_tensor_descriptor
<
n_out_dim
>
desc_input
(
input_shape
);
gs_launch
(
stream
,
nelements
)([
=
](
auto
i
)
{
hip_tensor_descriptor
<
n_out_dim
>
desc_output
(
out_comp_shape
);
auto
idx
=
out_comp
.
multi
(
i
);
gs_launch
(
stream
,
nelements
)([
=
](
auto
ii
)
{
idx
[
axis_index
]
=
indices_ptr
[
idx
[
axis_index
]];
auto
in_idx
=
desc_output
.
multi
(
ii
);
output_ptr
[
i
]
=
input
[
idx
];
in_idx
[
axis_index
]
=
indices_ptr
[
in_idx
[
axis_index
]];
out_ptr
[
ii
]
=
in_ptr
[
desc_input
.
linear
(
in_idx
)];
});
});
});
});
});
});
});
});
return
args
.
back
()
;
return
result
;
}
}
}
// namespace device
}
// namespace device
...
...
src/targets/gpu/device/include/migraphx/gpu/device/array.hpp
0 → 100644
View file @
be5f3539
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_ARRAY_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_ARRAY_HPP
#include <migraphx/gpu/device/types.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
template
<
class
T
,
std
::
size_t
N
>
struct
hip_array
{
T
d
[
N
];
MIGRAPHX_DEVICE_CONSTEXPR
T
&
operator
[](
std
::
size_t
i
)
{
return
d
[
i
];
}
MIGRAPHX_DEVICE_CONSTEXPR
const
T
&
operator
[](
std
::
size_t
i
)
const
{
return
d
[
i
];
}
MIGRAPHX_DEVICE_CONSTEXPR
T
*
data
()
{
return
d
;
}
MIGRAPHX_DEVICE_CONSTEXPR
const
T
*
data
()
const
{
return
d
;
}
MIGRAPHX_DEVICE_CONSTEXPR
std
::
integral_constant
<
std
::
size_t
,
N
>
size
()
const
{
return
{};
}
MIGRAPHX_DEVICE_CONSTEXPR
T
*
begin
()
{
return
d
;
}
MIGRAPHX_DEVICE_CONSTEXPR
const
T
*
begin
()
const
{
return
d
;
}
MIGRAPHX_DEVICE_CONSTEXPR
T
*
end
()
{
return
d
+
size
();
}
MIGRAPHX_DEVICE_CONSTEXPR
const
T
*
end
()
const
{
return
d
+
size
();
}
MIGRAPHX_DEVICE_CONSTEXPR
T
dot
(
const
hip_array
&
x
)
const
{
T
result
=
0
;
for
(
std
::
size_t
i
=
0
;
i
<
N
;
i
++
)
result
+=
x
[
i
]
*
d
[
i
];
return
result
;
}
MIGRAPHX_DEVICE_CONSTEXPR
T
product
()
const
{
T
result
=
1
;
for
(
std
::
size_t
i
=
0
;
i
<
N
;
i
++
)
result
*=
d
[
i
];
return
result
;
}
friend
MIGRAPHX_DEVICE_CONSTEXPR
hip_array
operator
*
(
const
hip_array
&
x
,
const
hip_array
&
y
)
{
hip_array
result
;
for
(
std
::
size_t
i
=
0
;
i
<
N
;
i
++
)
result
[
i
]
=
x
[
i
]
*
y
[
i
];
return
result
;
}
friend
MIGRAPHX_DEVICE_CONSTEXPR
hip_array
operator
+
(
const
hip_array
&
x
,
const
hip_array
&
y
)
{
hip_array
result
{};
for
(
std
::
size_t
i
=
0
;
i
<
N
;
i
++
)
result
[
i
]
=
x
[
i
]
+
y
[
i
];
return
result
;
}
};
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/device/include/migraphx/gpu/device/launch.hpp
View file @
be5f3539
...
@@ -11,9 +11,33 @@ namespace device {
...
@@ -11,9 +11,33 @@ namespace device {
struct
index
struct
index
{
{
std
::
size_t
global
;
std
::
size_t
global
=
0
;
std
::
size_t
local
;
std
::
size_t
local
=
0
;
std
::
size_t
group
;
std
::
size_t
group
=
0
;
__device__
std
::
size_t
nglobal
()
const
{
return
blockDim
.
x
*
gridDim
.
x
;
}
// NOLINT
__device__
std
::
size_t
nlocal
()
const
{
return
blockDim
.
x
;
}
// NOLINT
template
<
class
F
>
__device__
void
global_stride
(
std
::
size_t
n
,
F
f
)
const
{
const
auto
stride
=
nglobal
();
for
(
std
::
size_t
i
=
global
;
i
<
n
;
i
+=
stride
)
{
f
(
i
);
}
}
template
<
class
F
>
__device__
void
local_stride
(
std
::
size_t
n
,
F
f
)
const
{
const
auto
stride
=
nlocal
();
for
(
std
::
size_t
i
=
local
;
i
<
n
;
i
+=
stride
)
{
f
(
i
);
}
}
};
};
template
<
class
F
>
template
<
class
F
>
...
@@ -35,18 +59,26 @@ inline auto launch(hipStream_t stream, std::size_t global, std::size_t local)
...
@@ -35,18 +59,26 @@ inline auto launch(hipStream_t stream, std::size_t global, std::size_t local)
};
};
}
}
template
<
class
F
>
__host__
__device__
auto
gs_invoke
(
F
&&
f
,
std
::
size_t
i
,
index
idx
)
->
decltype
(
f
(
i
,
idx
))
{
return
f
(
i
,
idx
);
}
template
<
class
F
>
__host__
__device__
auto
gs_invoke
(
F
&&
f
,
std
::
size_t
i
,
index
)
->
decltype
(
f
(
i
))
{
return
f
(
i
);
}
inline
auto
gs_launch
(
hipStream_t
stream
,
std
::
size_t
n
,
std
::
size_t
local
=
1024
)
inline
auto
gs_launch
(
hipStream_t
stream
,
std
::
size_t
n
,
std
::
size_t
local
=
1024
)
{
{
std
::
size_t
groups
=
1
+
n
/
local
;
std
::
size_t
groups
=
(
n
+
local
-
1
)
/
local
;
std
::
size_t
nglobal
=
std
::
min
<
std
::
size_t
>
(
256
,
groups
)
*
local
;
std
::
size_t
nglobal
=
std
::
min
<
std
::
size_t
>
(
256
,
groups
)
*
local
;
return
[
=
](
auto
f
)
{
return
[
=
](
auto
f
)
{
launch
(
stream
,
nglobal
,
local
)([
=
](
auto
idx
)
{
launch
(
stream
,
nglobal
,
local
)(
for
(
size_t
i
=
idx
.
global
;
i
<
n
;
i
+=
nglobal
)
[
=
](
auto
idx
)
{
idx
.
global_stride
(
n
,
[
&
](
auto
i
)
{
gs_invoke
(
f
,
i
,
idx
);
});
});
{
f
(
i
);
}
});
};
};
}
}
...
...
src/targets/gpu/device/include/migraphx/gpu/device/nary.hpp
View file @
be5f3539
This diff is collapsed.
Click to expand it.
src/targets/gpu/device/include/migraphx/gpu/device/reduce.hpp
0 → 100644
View file @
be5f3539
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_REDUCE_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_REDUCE_HPP
#include <migraphx/gpu/device/launch.hpp>
#include <migraphx/gpu/device/visit.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
struct
sum
{
template
<
class
T
,
class
U
>
MIGRAPHX_DEVICE_CONSTEXPR
auto
operator
()(
T
x
,
U
y
)
const
{
return
x
+
y
;
}
};
struct
id
{
template
<
class
T
>
MIGRAPHX_DEVICE_CONSTEXPR
auto
operator
()(
T
x
)
const
{
return
x
;
}
};
struct
max
{
template
<
class
T
,
class
U
>
MIGRAPHX_DEVICE_CONSTEXPR
auto
operator
()(
T
x
,
U
y
)
const
{
return
x
>
y
?
x
:
y
;
}
};
struct
min
{
template
<
class
T
,
class
U
>
MIGRAPHX_DEVICE_CONSTEXPR
auto
operator
()(
T
x
,
U
y
)
const
{
return
x
<
y
?
x
:
y
;
}
};
struct
lowest
{
template
<
class
T
>
operator
T
()
const
{
return
device_cast
(
std
::
numeric_limits
<
host_type
<
T
>>::
lowest
());
}
};
struct
highest
{
template
<
class
T
>
operator
T
()
const
{
return
device_cast
(
std
::
numeric_limits
<
host_type
<
T
>>::
max
());
}
};
#ifdef MIGRAPHX_NO_DPP
template
<
std
::
size_t
N
,
class
Op
,
class
T
,
class
F
>
__device__
auto
block_reduce
(
index
idx
,
Op
op
,
T
init
,
std
::
size_t
n
,
F
f
)
{
using
type
=
decltype
(
f
(
idx
.
local
));
MIGRAPHX_DEVICE_SHARED
type
buffer
[
N
];
type
x
=
init
;
idx
.
local_stride
(
n
,
[
&
](
auto
i
)
{
x
=
op
(
x
,
f
(
i
));
});
buffer
[
idx
.
local
]
=
x
;
__syncthreads
();
for
(
std
::
size_t
s
=
1
;
s
<
idx
.
nlocal
();
s
*=
2
)
{
const
std
::
size_t
index
=
2
*
s
*
idx
.
local
;
if
(
index
+
s
<
idx
.
nlocal
())
{
buffer
[
index
]
=
op
(
buffer
[
index
],
buffer
[
index
+
s
]);
}
__syncthreads
();
}
return
buffer
[
0
];
}
#else
constexpr
unsigned
int
dpp_row_shr
(
unsigned
int
x
)
{
return
0x110u
|
x
;
}
constexpr
unsigned
int
dpp_row_bcast
(
unsigned
int
x
)
{
unsigned
int
y
=
0
;
switch
(
x
)
{
case
15
:
y
=
0x142
;
break
;
case
31
:
y
=
0x143
;
break
;
default:
throw
std
::
runtime_error
(
"Unknown bcast"
);
}
return
y
;
}
template
<
unsigned
int
DppCtrl
,
unsigned
int
RowMask
=
0xf
,
unsigned
int
BankMask
=
0xf
,
bool
BoundCtrl
=
false
,
class
T
>
__device__
T
dpp_mov
(
T
&
x
)
{
static
const
std
::
size_t
n
=
sizeof
(
T
)
<
4
?
1
:
sizeof
(
T
)
/
4
;
union
type
{
uint32_t
reg
[
n
];
T
data
;
};
type
output
{};
type
input
{};
// cppcheck-suppress unreadVariable
input
.
data
=
x
;
for
(
std
::
size_t
i
=
0
;
i
<
n
;
i
++
)
{
output
.
reg
[
i
]
=
__llvm_amdgcn_move_dpp
(
input
.
reg
[
i
],
DppCtrl
,
RowMask
,
BankMask
,
BoundCtrl
);
}
return
output
.
data
;
}
template
<
class
T
,
class
Op
>
__device__
void
dpp_reduce
(
T
&
in
,
Op
op
)
{
T
out
{};
out
=
dpp_mov
<
dpp_row_shr
(
1
)
>
(
in
);
in
=
op
(
in
,
out
);
out
=
dpp_mov
<
dpp_row_shr
(
2
)
>
(
in
);
in
=
op
(
in
,
out
);
out
=
dpp_mov
<
dpp_row_shr
(
4
),
0xf
,
0xe
>
(
in
);
in
=
op
(
in
,
out
);
out
=
dpp_mov
<
dpp_row_shr
(
8
),
0xf
,
0xc
>
(
in
);
in
=
op
(
in
,
out
);
out
=
dpp_mov
<
dpp_row_bcast
(
15
),
0xa
>
(
in
);
in
=
op
(
in
,
out
);
out
=
dpp_mov
<
dpp_row_bcast
(
31
),
0xc
>
(
in
);
in
=
op
(
in
,
out
);
}
__device__
inline
void
dpp_reduce
(
float
&
x
,
sum
)
{
#ifdef MIGRAPHX_USE_CLANG_TIDY
(
void
)
x
;
#else
__asm__
volatile
(
"s_nop 4
\n
"
"v_add_f32 %0 %0 %0 row_shr:1
\n
"
"s_nop 1
\n
"
"v_add_f32 %0 %0 %0 row_shr:2
\n
"
"s_nop 1
\n
"
"v_add_f32 %0 %0 %0 row_shr:4 bank_mask:0xe
\n
"
"s_nop 1
\n
"
"v_add_f32 %0 %0 %0 row_shr:8 bank_mask:0xc
\n
"
"s_nop 1
\n
"
"v_add_f32 %0 %0 %0 row_bcast:15 row_mask:0xa
\n
"
"s_nop 1
\n
"
"v_add_f32 %0 %0 %0 row_bcast:31 row_mask:0xc
\n
"
"s_nop 1
\n
"
:
"=v"
(
x
)
:
"0"
(
x
));
#endif
}
template
<
std
::
size_t
N
,
class
Op
,
class
T
,
class
F
>
__device__
auto
block_reduce
(
index
idx
,
Op
op
,
T
init
,
std
::
size_t
n
,
F
f
)
{
using
type
=
decltype
(
f
(
idx
.
local
));
MIGRAPHX_DEVICE_SHARED
type
buffer
[
N
/
64
];
type
x
=
init
;
idx
.
local_stride
(
n
,
[
&
](
auto
i
)
{
x
=
op
(
x
,
f
(
i
));
});
dpp_reduce
(
x
,
op
);
const
auto
ldsidx
=
idx
.
local
/
64
;
if
((
idx
.
local
%
64
)
==
63
)
{
buffer
[
ldsidx
]
=
x
;
}
__syncthreads
();
type
y
=
init
;
for
(
std
::
size_t
i
=
0
;
i
<
idx
.
nlocal
()
/
64
;
i
++
)
{
y
=
op
(
y
,
buffer
[
i
]);
}
return
y
;
}
#endif
constexpr
std
::
size_t
compute_block_size
(
std
::
size_t
n
,
std
::
size_t
max_block_size
)
{
size_t
block_size
=
64
;
while
(
block_size
<
max_block_size
and
block_size
<
n
)
block_size
*=
2
;
return
block_size
;
}
template
<
class
Op
,
class
T
,
class
Input
,
class
Output
>
void
reduce
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
,
Op
op
,
T
init
,
Input
read_input
,
Output
read_output
)
{
auto
&&
output_shape
=
result
.
get_shape
();
auto
&&
input_shape
=
arg
.
get_shape
();
std
::
vector
<
std
::
size_t
>
reduce_lens
;
std
::
transform
(
output_shape
.
lens
().
begin
(),
output_shape
.
lens
().
end
(),
input_shape
.
lens
().
begin
(),
std
::
back_inserter
(
reduce_lens
),
[](
auto
x
,
auto
y
)
->
std
::
size_t
{
if
(
x
==
y
)
return
1
;
else
return
y
;
});
shape
reduce_slice
{
output_shape
.
type
(),
reduce_lens
};
hip_visit_all
(
result
,
arg
,
reduce_slice
)([
&
](
auto
output
,
auto
input
,
auto
reduce_shape
)
{
auto
nelements
=
result
.
get_shape
().
elements
();
auto
relements
=
reduce_slice
.
elements
();
const
std
::
size_t
max_block_size
=
256
;
const
std
::
size_t
block_size
=
compute_block_size
(
relements
,
max_block_size
);
gs_launch
(
stream
,
nelements
*
block_size
,
block_size
)([
=
](
auto
i
,
auto
idx
)
__device__
{
const
auto
out_idx
=
i
/
block_size
;
auto
base_idx
=
output
.
get_shape
().
multi
(
out_idx
);
auto
r
=
block_reduce
<
max_block_size
>
(
idx
,
op
,
init
,
relements
,
[
&
](
auto
j
)
__device__
{
auto
reduce_idx
=
reduce_shape
.
multi
(
j
);
return
read_input
(
input
[
reduce_idx
+
base_idx
]);
});
if
(
idx
.
local
==
0
)
output
.
data
()[
out_idx
]
=
read_output
(
r
);
});
});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/device/include/migraphx/gpu/device/shape.hpp
0 → 100644
View file @
be5f3539
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_SHAPE_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_SHAPE_HPP
#include <migraphx/gpu/device/array.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
template
<
std
::
size_t
N
>
struct
hip_shape
{
using
hip_index
=
hip_array
<
std
::
size_t
,
N
>
;
hip_array
<
std
::
size_t
,
N
>
lens
=
{};
hip_array
<
std
::
size_t
,
N
>
strides
=
{};
bool
standard
=
false
;
__device__
__host__
hip_shape
()
=
default
;
hip_shape
(
const
shape
&
s
)
:
standard
(
s
.
standard
())
{
assert
(
s
.
lens
().
size
()
==
N
);
assert
(
s
.
strides
().
size
()
==
N
);
std
::
copy
(
s
.
lens
().
begin
(),
s
.
lens
().
end
(),
lens
.
begin
());
std
::
copy
(
s
.
strides
().
begin
(),
s
.
strides
().
end
(),
strides
.
begin
());
}
MIGRAPHX_DEVICE_CONSTEXPR
std
::
size_t
elements
()
const
{
return
lens
.
product
();
}
MIGRAPHX_DEVICE_CONSTEXPR
std
::
size_t
index
(
hip_index
x
)
const
{
return
x
.
dot
(
strides
);
}
MIGRAPHX_DEVICE_CONSTEXPR
std
::
size_t
index
(
std
::
initializer_list
<
std
::
size_t
>
x
)
const
{
std
::
size_t
idx
=
0
;
for
(
std
::
size_t
i
=
0
;
i
<
x
.
size
();
i
++
)
idx
+=
*
(
x
.
begin
()
+
i
)
*
strides
[
i
];
return
idx
;
}
MIGRAPHX_DEVICE_CONSTEXPR
std
::
size_t
index
(
std
::
size_t
i
)
const
{
if
(
this
->
standard
)
return
i
;
else
{
const
std
::
size_t
rank
=
this
->
lens
.
size
();
std
::
size_t
s
=
1
;
std
::
size_t
result
=
0
;
for
(
std
::
size_t
j
=
0
;
j
<
this
->
lens
.
size
();
j
++
)
{
const
std
::
size_t
k
=
rank
-
j
-
1
;
const
std
::
size_t
stride
=
this
->
strides
[
k
];
const
std
::
size_t
len
=
this
->
lens
[
k
];
const
std
::
size_t
slen
=
s
*
len
;
const
std
::
size_t
idx
=
(
i
%
slen
)
/
s
;
result
+=
stride
*
idx
;
s
=
slen
;
}
return
result
;
}
}
MIGRAPHX_DEVICE_CONSTEXPR
hip_index
multi
(
std
::
size_t
idx
)
const
{
hip_index
result
;
std
::
size_t
tidx
=
idx
;
for
(
std
::
size_t
is
=
0
;
is
<
result
.
size
();
is
++
)
{
result
[
is
]
=
tidx
/
strides
[
is
];
tidx
=
tidx
%
strides
[
is
];
}
return
result
;
}
MIGRAPHX_DEVICE_CONSTEXPR
hip_index
carry
(
hip_index
result
)
const
{
std
::
ptrdiff_t
rem
=
0
;
for
(
std
::
ptrdiff_t
i
=
result
.
size
()
-
1
;
i
>=
0
;
i
--
)
{
auto
z
=
result
[
i
]
+
rem
;
rem
=
z
-
std
::
ptrdiff_t
(
lens
[
i
])
+
1
;
if
(
rem
>
0
)
z
-=
rem
;
else
rem
=
0
;
result
[
i
]
=
z
;
}
return
result
;
}
};
template
<
std
::
size_t
N
>
hip_shape
<
N
>
make_hip_shape
(
const
shape
&
x
)
{
return
x
;
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/device/include/migraphx/gpu/device/tensor.hpp
View file @
be5f3539
#ifndef MIGRAPHX_GUARD_RTGLIB_DEAVICE_TENSOR_HPP
#ifndef MIGRAPHX_GUARD_RTGLIB_DEAVICE_TENSOR_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEAVICE_TENSOR_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEAVICE_TENSOR_HPP
#include <hip/hip_runtime.h>
#include <migraphx/gpu/device/visit.hpp>
#include <migraphx/functional.hpp>
#include <migraphx/config.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
gpu
{
namespace
device
{
namespace
device
{
template
<
class
F
>
template
<
std
::
size_t
NDim
>
void
visit_tensor_size
(
std
::
size_t
n
,
F
f
)
using
hip_tensor_index
=
hip_array
<
std
::
size_t
,
NDim
>
;
{
switch
(
n
)
{
case
1
:
{
f
(
std
::
integral_constant
<
std
::
size_t
,
1
>
{});
break
;
}
case
2
:
{
f
(
std
::
integral_constant
<
std
::
size_t
,
2
>
{});
break
;
}
case
3
:
{
f
(
std
::
integral_constant
<
std
::
size_t
,
3
>
{});
break
;
}
case
4
:
{
f
(
std
::
integral_constant
<
std
::
size_t
,
4
>
{});
break
;
}
case
5
:
{
f
(
std
::
integral_constant
<
std
::
size_t
,
5
>
{});
break
;
}
default:
throw
std
::
runtime_error
(
"Unknown tensor size"
);
}
}
template
<
size_t
NDim
>
template
<
std
::
size_t
NDim
>
struct
hip_index
{
size_t
d
[
NDim
];
__device__
__host__
size_t
&
operator
[](
size_t
i
)
{
return
d
[
i
];
}
__device__
__host__
size_t
operator
[](
size_t
i
)
const
{
return
d
[
i
];
}
};
template
<
size_t
NDim
>
struct
hip_tensor_descriptor
struct
hip_tensor_descriptor
{
{
__device__
__host__
hip_tensor_descriptor
()
=
default
;
__device__
__host__
hip_tensor_descriptor
()
=
default
;
...
@@ -63,12 +22,11 @@ struct hip_tensor_descriptor
...
@@ -63,12 +22,11 @@ struct hip_tensor_descriptor
std
::
copy
(
s
.
strides
().
begin
(),
s
.
strides
().
end
(),
strides
);
std
::
copy
(
s
.
strides
().
begin
(),
s
.
strides
().
end
(),
strides
);
}
}
__device__
__host__
hip_index
<
NDim
>
multi
(
size_t
idx
)
const
__device__
__host__
hip_
tensor_
index
<
NDim
>
multi
(
std
::
size_t
idx
)
const
{
{
hip_index
<
NDim
>
result
{};
hip_tensor_index
<
NDim
>
result
{};
size_t
tidx
=
idx
;
std
::
size_t
tidx
=
idx
;
for
(
std
::
size_t
is
=
0
;
is
<
NDim
;
is
++
)
for
(
size_t
is
=
0
;
is
<
NDim
;
is
++
)
{
{
result
[
is
]
=
tidx
/
strides
[
is
];
result
[
is
]
=
tidx
/
strides
[
is
];
tidx
=
tidx
%
strides
[
is
];
tidx
=
tidx
%
strides
[
is
];
...
@@ -76,16 +34,15 @@ struct hip_tensor_descriptor
...
@@ -76,16 +34,15 @@ struct hip_tensor_descriptor
return
result
;
return
result
;
}
}
__device__
__host__
std
::
size_t
linear
(
hip_tensor_index
<
NDim
>
s
)
const
__device__
__host__
size_t
linear
(
hip_index
<
NDim
>
s
)
const
{
{
size_t
idx
=
0
;
std
::
size_t
idx
=
0
;
for
(
size_t
i
=
0
;
i
<
NDim
;
i
++
)
for
(
std
::
size_t
i
=
0
;
i
<
NDim
;
i
++
)
idx
+=
s
[
i
]
*
strides
[
i
];
idx
+=
s
[
i
]
*
strides
[
i
];
return
idx
;
return
idx
;
}
}
size_t
lens
[
NDim
]
=
{};
std
::
size_t
lens
[
NDim
]
=
{};
size_t
strides
[
NDim
]
=
{};
std
::
size_t
strides
[
NDim
]
=
{};
};
};
}
// namespace device
}
// namespace device
...
...
src/targets/gpu/device/include/migraphx/gpu/device/tensor_view.hpp
0 → 100644
View file @
be5f3539
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_TENSOR_VIEW_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_TENSOR_VIEW_HPP
#include <migraphx/gpu/device/shape.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
template
<
class
T
,
std
::
size_t
N
>
struct
hip_tensor_view
{
using
value_type
=
T
;
using
hip_index
=
typename
hip_shape
<
N
>::
hip_index
;
__device__
__host__
hip_tensor_view
()
=
default
;
__host__
hip_tensor_view
(
tensor_view
<
T
>
x
)
:
d
(
x
.
data
()),
s
(
x
.
get_shape
())
{}
__host__
hip_tensor_view
(
T
*
x
,
const
shape
&
ss
)
:
d
(
x
),
s
(
ss
)
{}
MIGRAPHX_DEVICE_CONSTEXPR
const
hip_shape
<
N
>&
get_shape
()
const
{
return
s
;
}
MIGRAPHX_DEVICE_CONSTEXPR
std
::
size_t
size
()
const
{
return
s
.
elements
();
}
MIGRAPHX_DEVICE_CONSTEXPR
value_type
*
data
()
const
{
return
d
;
}
template
<
class
U
>
MIGRAPHX_DEVICE_CONSTEXPR
value_type
&
operator
[](
U
i
)
const
{
return
d
[
s
.
index
(
i
)];
}
MIGRAPHX_DEVICE_CONSTEXPR
value_type
*
begin
()
const
{
return
d
;
}
MIGRAPHX_DEVICE_CONSTEXPR
value_type
*
end
()
const
{
return
d
+
size
();
}
private:
value_type
*
d
=
nullptr
;
hip_shape
<
N
>
s
{};
};
template
<
std
::
size_t
N
,
class
T
>
hip_tensor_view
<
T
,
N
>
make_hip_view
(
const
shape
&
s
,
T
*
x
)
{
return
{
x
,
s
};
}
template
<
std
::
size_t
N
,
class
T
>
hip_tensor_view
<
T
,
N
>
make_hip_view
(
tensor_view
<
T
>
x
)
{
return
{
x
};
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/device/include/migraphx/gpu/device/types.hpp
View file @
be5f3539
...
@@ -8,14 +8,45 @@
...
@@ -8,14 +8,45 @@
#ifndef MIGRAPHX_GUARD_RTGLIB_GPU_DEVICE_TYPES_HPP
#ifndef MIGRAPHX_GUARD_RTGLIB_GPU_DEVICE_TYPES_HPP
#define MIGRAPHX_GUARD_RTGLIB_GPU_DEVICE_TYPES_HPP
#define MIGRAPHX_GUARD_RTGLIB_GPU_DEVICE_TYPES_HPP
#include <hip/hip_runtime.h>
#include <migraphx/half.hpp>
#include <migraphx/half.hpp>
#include <migraphx/config.hpp>
#include <migraphx/config.hpp>
#include <migraphx/tensor_view.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
gpu
{
namespace
device
{
namespace
device
{
#define MIGRAPHX_DEVICE_CONSTEXPR constexpr __device__ __host__ // NOLINT
template
<
class
T
,
std
::
size_t
N
>
using
vec
=
T
__attribute__
((
ext_vector_type
(
N
)));
template
<
std
::
size_t
N
,
class
T
>
__device__
__host__
T
*
as_pointer
(
vec
<
T
,
N
>*
x
)
{
return
reinterpret_cast
<
T
*>
(
x
);
}
template
<
std
::
size_t
N
,
class
T
>
__device__
__host__
vec
<
T
,
N
>*
as_vec
(
T
*
x
)
{
return
reinterpret_cast
<
vec
<
T
,
N
>*>
(
x
);
}
template
<
std
::
size_t
N
,
class
T
>
tensor_view
<
vec
<
T
,
N
>>
as_vec
(
tensor_view
<
T
>
x
)
{
return
{
x
.
get_shape
(),
as_vec
<
N
>
(
x
.
data
())};
}
template
<
std
::
size_t
N
,
class
...
Ts
>
auto
pack_vec
(
Ts
...
xs
)
{
return
[
=
](
auto
f
,
std
::
size_t
n
)
{
return
f
(
as_vec
<
N
>
(
xs
)[
n
]...);
};
}
using
gpu_half
=
__fp16
;
using
gpu_half
=
__fp16
;
namespace
detail
{
namespace
detail
{
...
@@ -25,6 +56,12 @@ struct device_type
...
@@ -25,6 +56,12 @@ struct device_type
using
type
=
T
;
using
type
=
T
;
};
};
template
<
class
T
,
std
::
size_t
N
>
struct
device_type
<
vec
<
T
,
N
>>
{
using
type
=
vec
<
typename
device_type
<
T
>::
type
,
N
>
;
};
template
<
>
template
<
>
struct
device_type
<
half
>
struct
device_type
<
half
>
{
{
...
@@ -38,7 +75,7 @@ struct host_type
...
@@ -38,7 +75,7 @@ struct host_type
};
};
template
<
>
template
<
>
struct
device
_type
<
gpu_half
>
struct
host
_type
<
gpu_half
>
{
{
using
type
=
half
;
using
type
=
half
;
};
};
...
@@ -54,7 +91,7 @@ using device_type = typename detail::device_type<T>::type;
...
@@ -54,7 +91,7 @@ using device_type = typename detail::device_type<T>::type;
template
<
class
T
>
template
<
class
T
>
host_type
<
T
>
host_cast
(
T
x
)
host_type
<
T
>
host_cast
(
T
x
)
{
{
return
reinterpret_cast
<
host_type
<
T
>>
(
x
);
return
reinterpret_cast
<
const
host_type
<
T
>
&
>
(
x
);
}
}
template
<
class
T
>
template
<
class
T
>
...
@@ -64,9 +101,9 @@ host_type<T>* host_cast(T* x)
...
@@ -64,9 +101,9 @@ host_type<T>* host_cast(T* x)
}
}
template
<
class
T
>
template
<
class
T
>
device_type
<
T
>
device_cast
(
T
x
)
device_type
<
T
>
device_cast
(
const
T
&
x
)
{
{
return
reinterpret_cast
<
device_type
<
T
>>
(
x
);
return
reinterpret_cast
<
const
device_type
<
T
>
&
>
(
x
);
}
}
template
<
class
T
>
template
<
class
T
>
...
@@ -76,13 +113,19 @@ device_type<T>* device_cast(T* x)
...
@@ -76,13 +113,19 @@ device_type<T>* device_cast(T* x)
}
}
template
<
class
T
>
template
<
class
T
>
T
to_hip_type
(
T
x
)
tensor_view
<
device_type
<
T
>>
device_cast
(
tensor_view
<
T
>
x
)
{
return
{
x
.
get_shape
(),
reinterpret_cast
<
device_type
<
T
>*>
(
x
.
data
())};
}
template
<
class
T
>
__device__
__host__
T
to_hip_type
(
T
x
)
{
{
return
x
;
return
x
;
}
}
// Hip doens't support __fp16
// Hip doens't support __fp16
inline
float
to_hip_type
(
gpu_half
x
)
{
return
x
;
}
inline
__device__
__host__
float
to_hip_type
(
gpu_half
x
)
{
return
x
;
}
}
// namespace device
}
// namespace device
}
// namespace gpu
}
// namespace gpu
...
...
src/targets/gpu/device/include/migraphx/gpu/device/vector.hpp
0 → 100644
View file @
be5f3539
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_VECTOR_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_VECTOR_HPP
#include <migraphx/gpu/device/types.hpp>
#include <vector>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
template
<
class
T
,
std
::
size_t
N
>
struct
hip_vector
{
MIGRAPHX_DEVICE_CONSTEXPR
hip_vector
()
=
default
;
MIGRAPHX_DEVICE_CONSTEXPR
hip_vector
(
std
::
size_t
s
)
:
len
(
s
)
{}
template
<
class
Iterator
>
__device__
__host__
hip_vector
(
Iterator
start
,
Iterator
last
)
{
auto
it
=
std
::
copy
(
start
,
last
,
d
);
len
=
std
::
distance
(
d
,
it
);
}
__device__
__host__
hip_vector
(
std
::
initializer_list
<
T
>
x
)
{
std
::
copy
(
x
.
begin
(),
x
.
end
(),
d
);
len
=
x
.
size
();
}
MIGRAPHX_DEVICE_CONSTEXPR
T
&
operator
[](
std
::
size_t
i
)
{
return
d
[
i
];
}
MIGRAPHX_DEVICE_CONSTEXPR
const
T
&
operator
[](
std
::
size_t
i
)
const
{
return
d
[
i
];
}
MIGRAPHX_DEVICE_CONSTEXPR
T
&
front
()
{
return
d
[
0
];
}
MIGRAPHX_DEVICE_CONSTEXPR
const
T
&
front
()
const
{
return
d
[
0
];
}
MIGRAPHX_DEVICE_CONSTEXPR
T
&
back
()
{
return
d
[
size
()
-
1
];
}
MIGRAPHX_DEVICE_CONSTEXPR
const
T
&
back
()
const
{
return
d
[
size
()
-
1
];
}
MIGRAPHX_DEVICE_CONSTEXPR
T
*
data
()
{
return
d
;
}
MIGRAPHX_DEVICE_CONSTEXPR
const
T
*
data
()
const
{
return
d
;
}
MIGRAPHX_DEVICE_CONSTEXPR
std
::
size_t
size
()
const
{
return
len
;
}
MIGRAPHX_DEVICE_CONSTEXPR
T
*
begin
()
{
return
d
;
}
MIGRAPHX_DEVICE_CONSTEXPR
const
T
*
begin
()
const
{
return
d
;
}
MIGRAPHX_DEVICE_CONSTEXPR
T
*
end
()
{
return
d
+
size
();
}
MIGRAPHX_DEVICE_CONSTEXPR
const
T
*
end
()
const
{
return
d
+
size
();
}
template
<
class
U
>
MIGRAPHX_DEVICE_CONSTEXPR
void
push_back
(
U
&&
x
)
{
d
[
len
]
=
static_cast
<
U
&&>
(
x
);
len
++
;
}
private:
T
d
[
N
]
=
{};
std
::
size_t
len
=
0
;
};
template
<
std
::
size_t
N
,
class
T
>
hip_vector
<
T
,
N
>
to_hip_vector
(
const
std
::
vector
<
T
>&
x
)
{
hip_vector
<
T
,
N
>
result
(
x
.
size
());
std
::
copy
(
x
.
begin
(),
x
.
end
(),
result
.
begin
());
return
result
;
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/device/include/migraphx/gpu/device/visit.hpp
0 → 100644
View file @
be5f3539
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_VISIT_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_VISIT_HPP
#include <migraphx/gpu/device/tensor_view.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
template
<
class
F
>
void
visit_tensor_size
(
std
::
size_t
n
,
F
f
)
{
switch
(
n
)
{
case
1
:
{
f
(
std
::
integral_constant
<
std
::
size_t
,
1
>
{});
break
;
}
case
2
:
{
f
(
std
::
integral_constant
<
std
::
size_t
,
2
>
{});
break
;
}
case
3
:
{
f
(
std
::
integral_constant
<
std
::
size_t
,
3
>
{});
break
;
}
case
4
:
{
f
(
std
::
integral_constant
<
std
::
size_t
,
4
>
{});
break
;
}
case
5
:
{
f
(
std
::
integral_constant
<
std
::
size_t
,
5
>
{});
break
;
}
default:
throw
std
::
runtime_error
(
"Unknown tensor size"
);
}
}
inline
shape
get_shape
(
const
shape
&
x
)
{
return
x
;
}
template
<
class
T
>
auto
get_shape
(
const
T
&
x
)
->
decltype
(
x
.
get_shape
())
{
return
x
.
get_shape
();
}
template
<
class
V
,
class
F
,
class
...
Ts
>
void
hip_visit_all_impl
(
const
shape
&
s
,
F
f
,
V
&&
v
,
Ts
&&
...
xs
)
{
std
::
initializer_list
<
migraphx
::
shape
::
type_t
>
types
=
{
get_shape
(
xs
).
type
()...};
if
(
!
std
::
all_of
(
types
.
begin
(),
types
.
end
(),
[
&
](
migraphx
::
shape
::
type_t
t
)
{
return
t
==
s
.
type
();
}))
MIGRAPHX_THROW
(
"Types must be the same"
);
std
::
initializer_list
<
std
::
size_t
>
ranks
=
{
get_shape
(
xs
).
lens
().
size
()...};
if
(
!
std
::
all_of
(
ranks
.
begin
(),
ranks
.
end
(),
[
&
](
std
::
size_t
r
)
{
return
r
==
s
.
lens
().
size
();
}))
MIGRAPHX_THROW
(
"Ranks must be the same"
);
visit_tensor_size
(
s
.
lens
().
size
(),
[
&
](
auto
ndim
)
{
s
.
visit_type
([
&
](
auto
as
)
{
v
(
f
(
xs
,
ndim
,
as
)...);
});
});
}
template
<
class
V
,
class
F
,
class
...
Ts
>
void
hip_visit_views_impl
(
const
shape
&
s
,
F
f
,
V
&&
v
,
Ts
&&
...
xs
)
{
std
::
initializer_list
<
std
::
size_t
>
ranks
=
{
get_shape
(
xs
).
lens
().
size
()...};
if
(
!
std
::
all_of
(
ranks
.
begin
(),
ranks
.
end
(),
[
&
](
std
::
size_t
r
)
{
return
r
==
s
.
lens
().
size
();
}))
MIGRAPHX_THROW
(
"Ranks must be the same"
);
visit_tensor_size
(
s
.
lens
().
size
(),
[
&
](
auto
ndim
)
{
v
(
f
(
xs
,
ndim
)...);
});
}
template
<
class
F
>
struct
hip_convert
{
F
f
;
template
<
class
RawData
,
class
N
,
class
As
>
auto
operator
()(
RawData
x
,
N
ndim
,
As
as
)
const
->
decltype
(
make_hip_view
<
ndim
>
(
x
.
get_shape
(),
f
(
as
.
from
(
x
.
data
()))))
{
return
make_hip_view
<
ndim
>
(
x
.
get_shape
(),
f
(
as
.
from
(
x
.
data
())));
}
template
<
class
N
,
class
As
>
auto
operator
()(
const
shape
&
s
,
N
ndim
,
As
)
const
{
return
make_hip_shape
<
ndim
>
(
s
);
}
};
template
<
class
F
>
hip_convert
<
F
>
make_hip_convert
(
F
f
)
{
return
{
f
};
}
template
<
class
F
>
struct
hip_convert_view
{
F
f
;
template
<
class
T
,
class
N
>
auto
operator
()(
tensor_view
<
T
>
x
,
N
ndim
)
const
{
return
make_hip_view
<
ndim
>
(
f
(
x
));
}
template
<
class
N
>
auto
operator
()(
const
shape
&
s
,
N
ndim
)
const
{
return
make_hip_shape
<
ndim
>
(
s
);
}
};
template
<
class
F
>
hip_convert_view
<
F
>
make_hip_convert_view
(
F
f
)
{
return
{
f
};
}
template
<
class
T
,
class
...
Ts
>
auto
hip_visit_all
(
T
&&
x
,
Ts
&&
...
xs
)
{
return
[
&
](
auto
f
)
{
hip_visit_all_impl
(
get_shape
(
x
),
make_hip_convert
([](
auto
*
p
)
{
return
device_cast
(
p
);
}),
f
,
x
,
xs
...);
};
}
template
<
std
::
size_t
N
,
class
T
,
class
...
Ts
>
auto
hip_vec_visit_all
(
T
&&
x
,
Ts
&&
...
xs
)
{
return
[
&
](
auto
f
)
{
hip_visit_all_impl
(
get_shape
(
x
),
make_hip_convert
([](
auto
*
p
)
{
return
as_vec
<
N
>
(
device_cast
(
p
));
}),
f
,
x
,
xs
...);
};
}
template
<
class
T
,
class
...
Ts
>
auto
hip_pointer_visit_all
(
T
&&
x
,
Ts
&&
...
xs
)
{
return
[
&
](
auto
f
)
{
visit_all
(
x
,
xs
...)([
&
](
auto
...
vs
)
{
f
(
device_cast
(
vs
.
data
())...);
});
};
}
template
<
class
T
,
class
...
Ts
>
auto
hip_visit_views
(
T
&&
x
,
Ts
&&
...
xs
)
{
return
[
&
](
auto
f
)
{
hip_visit_views_impl
(
get_shape
(
x
),
make_hip_convert_view
([](
auto
v
)
{
return
device_cast
(
v
);
}),
f
,
x
,
xs
...);
};
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/device/logsoftmax.cpp
View file @
be5f3539
#include <migraphx/shape.hpp>
#include <migraphx/shape.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/gpu/device/logsoftmax.hpp>
#include <migraphx/gpu/device/logsoftmax.hpp>
#include <migraphx/gpu/device/reduce.hpp>
#include <migraphx/gpu/device/tensor.hpp>
#include <migraphx/gpu/device/tensor.hpp>
#include <migraphx/gpu/device/launch.hpp>
#include <migraphx/gpu/device/launch.hpp>
#include <migraphx/gpu/device/types.hpp>
#include <migraphx/gpu/device/types.hpp>
...
@@ -11,66 +12,45 @@ inline namespace MIGRAPHX_INLINE_NS {
...
@@ -11,66 +12,45 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace
gpu
{
namespace
gpu
{
namespace
device
{
namespace
device
{
argument
logsoftmax
(
hipStream_t
stream
,
void
logsoftmax
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
,
int
axis
)
const
migraphx
::
shape
&
output_shape
,
std
::
vector
<
migraphx
::
argument
>
args
,
int
axis
)
{
{
auto
lens
=
result
.
get_shape
().
lens
();
auto
lens
=
output_shape
.
lens
();
auto
batch_lens
=
lens
;
auto
num_in_batch
=
lens
[
axis
];
std
::
size_t
batch_item_num
=
lens
[
axis
];
auto
batch_lens
=
lens
;
batch_lens
[
axis
]
=
1
;
batch_lens
[
axis
]
=
1
;
migraphx
::
shape
batch_shape
{
result
.
get_shape
().
type
(),
batch_lens
};
migraphx
::
shape
batch_shape
{
output_shape
.
type
(),
batch_lens
};
hip_visit_all
(
result
,
arg
,
batch_shape
)([
&
](
auto
output
,
auto
input
,
auto
batch
)
{
visit_all
(
args
.
back
(),
args
.
front
())([
&
](
auto
output
,
auto
input
)
{
const
std
::
size_t
max_block_size
=
256
;
const
auto
*
input_ptr
=
device_cast
(
input
.
data
());
const
std
::
size_t
block_size
=
compute_block_size
(
batch_item_num
,
max_block_size
);
auto
*
output_ptr
=
device_cast
(
output
.
data
());
gs_launch
(
stream
,
visit_tensor_size
(
batch_shape
.
lens
().
size
(),
[
&
](
auto
n_dim
)
{
batch_shape
.
elements
()
*
block_size
,
hip_tensor_descriptor
<
n_dim
>
desc_batch
(
batch_shape
);
block_size
)([
=
](
auto
i
,
auto
idx
)
__device__
{
hip_tensor_descriptor
<
n_dim
>
desc_data
(
output_shape
);
auto
data_idx
=
batch
.
multi
(
i
/
block_size
);
using
type
=
device_type
<
std
::
remove_cv_t
<
typename
decltype
(
input
)
::
value_type
>>
;
// each thread is for one item in the batch
type
init
=
lowest
();
gs_launch
(
stream
,
batch_shape
.
elements
())([
=
](
auto
i
)
{
auto
batch_idx
=
desc_batch
.
multi
(
i
);
auto
batch_max
=
block_reduce
<
max_block_size
>
(
auto
data_idx
=
batch_idx
;
idx
,
max
{},
init
,
batch_item_num
,
[
&
](
auto
j
)
__device__
{
// get max
auto
batch_max
=
input_ptr
[
desc_data
.
linear
(
batch_idx
)];
for
(
std
::
size_t
j
=
1
;
j
<
num_in_batch
;
++
j
)
{
data_idx
[
axis
]
=
j
;
data_idx
[
axis
]
=
j
;
size_t
idx
=
desc_data
.
linear
(
data_idx
);
return
input
[
data_idx
];
batch_max
=
std
::
max
(
to_hip_type
(
batch_max
),
to_hip_type
(
input_ptr
[
idx
]));
});
}
for
(
std
::
size_t
j
=
0
;
j
<
num_in_batch
;
++
j
)
auto
batch_sum
=
{
block_reduce
<
max_block_size
>
(
idx
,
sum
{},
0
,
batch_item_num
,
[
&
](
auto
j
)
__device__
{
data_idx
[
axis
]
=
j
;
size_t
idx
=
desc_data
.
linear
(
data_idx
);
output_ptr
[
idx
]
=
input_ptr
[
idx
]
-
batch_max
;
}
auto
batch_sum
=
::
exp
(
to_hip_type
(
output_ptr
[
desc_data
.
linear
(
batch_idx
)]));
for
(
std
::
size_t
j
=
1
;
j
<
num_in_batch
;
++
j
)
{
data_idx
[
axis
]
=
j
;
data_idx
[
axis
]
=
j
;
size_t
idx
=
desc_data
.
linear
(
data_idx
);
auto
val
=
input
[
data_idx
]
-
batch_max
;
batch_sum
+=
::
exp
(
to_hip_type
(
output_ptr
[
idx
]));
return
::
exp
(
to_hip_type
(
val
));
}
});
batch_sum
=
::
log
(
to_hip_type
(
batch_sum
));
for
(
std
::
size_t
j
=
0
;
j
<
num_in_batch
;
++
j
)
auto
log_batch_sum
=
::
log
(
to_hip_type
(
batch_sum
))
+
batch_max
;
{
data_idx
[
axis
]
=
j
;
idx
.
local_stride
(
batch_item_num
,
[
&
](
auto
j
)
{
size_t
idx
=
desc_data
.
linear
(
data_idx
);
data_idx
[
axis
]
=
j
;
output_ptr
[
idx
]
-=
batch_sum
;
output
[
data_idx
]
=
input
[
data_idx
]
-
log_batch_sum
;
}
});
});
});
});
});
});
return
args
.
back
();
}
}
}
// namespace device
}
// namespace device
...
...
src/targets/gpu/device/pad.cpp
View file @
be5f3539
...
@@ -4,6 +4,7 @@
...
@@ -4,6 +4,7 @@
#include <migraphx/gpu/device/pad.hpp>
#include <migraphx/gpu/device/pad.hpp>
#include <migraphx/gpu/device/tensor.hpp>
#include <migraphx/gpu/device/tensor.hpp>
#include <migraphx/gpu/device/launch.hpp>
#include <migraphx/gpu/device/launch.hpp>
#include <migraphx/float_equal.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
@@ -14,24 +15,26 @@ argument
...
@@ -14,24 +15,26 @@ argument
pad
(
hipStream_t
stream
,
argument
result
,
argument
arg1
,
float
value
,
std
::
vector
<
std
::
int64_t
>
pads
)
pad
(
hipStream_t
stream
,
argument
result
,
argument
arg1
,
float
value
,
std
::
vector
<
std
::
int64_t
>
pads
)
{
{
std
::
size_t
nelements
=
arg1
.
get_shape
().
elements
();
std
::
size_t
nelements
=
arg1
.
get_shape
().
elements
();
hip_visit_all
(
result
,
arg1
)([
&
](
auto
output
,
auto
input
)
{
using
type
=
typename
decltype
(
output
)
::
value_type
;
using
hip_index
=
typename
decltype
(
output
)
::
hip_index
;
type
device_val
=
value
;
if
(
float_equal
(
value
,
std
::
numeric_limits
<
float
>::
lowest
()))
{
device_val
=
device_cast
(
std
::
numeric_limits
<
type
>::
lowest
());
}
gs_launch
(
stream
,
result
.
get_shape
().
elements
())([
=
](
auto
i
)
{
output
.
data
()[
i
]
=
device_val
;
});
nary
(
stream
,
result
)([
=
]
{
return
value
;
});
hip_index
offsets
;
visit_all
(
result
,
arg1
)([
&
](
auto
output
,
auto
input
)
{
std
::
copy
(
pads
.
begin
(),
pads
.
begin
()
+
offsets
.
size
(),
offsets
.
begin
());
visit_tensor_size
(
result
.
get_shape
().
lens
().
size
(),
[
&
](
auto
ndim
)
{
gs_launch
(
stream
,
nelements
)([
=
](
auto
i
)
{
std
::
size_t
offsets
[
ndim
];
auto
idx
=
input
.
get_shape
().
multi
(
i
);
std
::
copy
(
pads
.
begin
(),
pads
.
begin
()
+
ndim
,
offsets
);
for
(
std
::
size_t
j
=
0
;
j
<
offsets
.
size
();
j
++
)
auto
*
outptr
=
output
.
data
();
{
const
auto
*
inptr
=
input
.
data
();
idx
[
j
]
+=
offsets
[
j
];
hip_tensor_descriptor
<
ndim
>
desc_input
(
input
.
get_shape
());
}
hip_tensor_descriptor
<
ndim
>
desc_output
(
output
.
get_shape
());
output
[
idx
]
=
input
.
data
()[
i
];
gs_launch
(
stream
,
nelements
)([
=
](
auto
i
)
{
auto
idx
=
desc_input
.
multi
(
i
);
for
(
std
::
size_t
j
=
0
;
j
<
ndim
;
j
++
)
{
idx
[
j
]
+=
offsets
[
j
];
}
outptr
[
desc_output
.
linear
(
idx
)]
=
inptr
[
i
];
});
});
});
});
});
return
result
;
return
result
;
...
...
src/targets/gpu/device/reduce_sum.cpp
0 → 100644
View file @
be5f3539
#include <migraphx/gpu/device/reduce_sum.hpp>
#include <migraphx/gpu/device/reduce.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
reduce_sum
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
reduce
(
stream
,
result
,
arg
,
sum
{},
0
,
id
{},
id
{});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
Prev
1
2
3
4
5
6
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment