Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
20b1d690
Commit
20b1d690
authored
Sep 20, 2019
by
Paul
Browse files
Merge branch 'develop' into tests
parents
17aaaa1e
ba729cfc
Changes
281
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
847 additions
and
125 deletions
+847
-125
src/targets/gpu/device/include/migraphx/gpu/device/shape.hpp
src/targets/gpu/device/include/migraphx/gpu/device/shape.hpp
+105
-0
src/targets/gpu/device/include/migraphx/gpu/device/tensor.hpp
...targets/gpu/device/include/migraphx/gpu/device/tensor.hpp
+14
-54
src/targets/gpu/device/include/migraphx/gpu/device/tensor_view.hpp
...ts/gpu/device/include/migraphx/gpu/device/tensor_view.hpp
+59
-0
src/targets/gpu/device/include/migraphx/gpu/device/types.hpp
src/targets/gpu/device/include/migraphx/gpu/device/types.hpp
+49
-6
src/targets/gpu/device/include/migraphx/gpu/device/vector.hpp
...targets/gpu/device/include/migraphx/gpu/device/vector.hpp
+76
-0
src/targets/gpu/device/include/migraphx/gpu/device/visit.hpp
src/targets/gpu/device/include/migraphx/gpu/device/visit.hpp
+170
-0
src/targets/gpu/device/int8_gemm_pack.cpp
src/targets/gpu/device/int8_gemm_pack.cpp
+77
-0
src/targets/gpu/device/logsoftmax.cpp
src/targets/gpu/device/logsoftmax.cpp
+37
-48
src/targets/gpu/device/mul_add.cpp
src/targets/gpu/device/mul_add.cpp
+21
-0
src/targets/gpu/device/pad.cpp
src/targets/gpu/device/pad.cpp
+20
-17
src/targets/gpu/device/pow.cpp
src/targets/gpu/device/pow.cpp
+18
-0
src/targets/gpu/device/reduce_mean.cpp
src/targets/gpu/device/reduce_mean.cpp
+18
-0
src/targets/gpu/device/reduce_sum.cpp
src/targets/gpu/device/reduce_sum.cpp
+18
-0
src/targets/gpu/device/relu.cpp
src/targets/gpu/device/relu.cpp
+17
-0
src/targets/gpu/device/round.cpp
src/targets/gpu/device/round.cpp
+18
-0
src/targets/gpu/device/rsqrt.cpp
src/targets/gpu/device/rsqrt.cpp
+18
-0
src/targets/gpu/device/sigmoid.cpp
src/targets/gpu/device/sigmoid.cpp
+18
-0
src/targets/gpu/device/sign.cpp
src/targets/gpu/device/sign.cpp
+18
-0
src/targets/gpu/device/softmax.cpp
src/targets/gpu/device/softmax.cpp
+59
-0
src/targets/gpu/device/sqdiff.cpp
src/targets/gpu/device/sqdiff.cpp
+17
-0
No files found.
src/targets/gpu/device/include/migraphx/gpu/device/shape.hpp
0 → 100644
View file @
20b1d690
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_SHAPE_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_SHAPE_HPP
#include <migraphx/gpu/device/array.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
template
<
std
::
size_t
N
>
struct
hip_shape
{
using
hip_index
=
hip_array
<
std
::
size_t
,
N
>
;
hip_array
<
std
::
size_t
,
N
>
lens
=
{};
hip_array
<
std
::
size_t
,
N
>
strides
=
{};
bool
standard
=
false
;
__device__
__host__
hip_shape
()
=
default
;
hip_shape
(
const
shape
&
s
)
:
standard
(
s
.
standard
())
{
assert
(
s
.
lens
().
size
()
==
N
);
assert
(
s
.
strides
().
size
()
==
N
);
std
::
copy
(
s
.
lens
().
begin
(),
s
.
lens
().
end
(),
lens
.
begin
());
std
::
copy
(
s
.
strides
().
begin
(),
s
.
strides
().
end
(),
strides
.
begin
());
}
MIGRAPHX_DEVICE_CONSTEXPR
std
::
size_t
elements
()
const
{
return
lens
.
product
();
}
MIGRAPHX_DEVICE_CONSTEXPR
std
::
size_t
index
(
hip_index
x
)
const
{
return
x
.
dot
(
strides
);
}
MIGRAPHX_DEVICE_CONSTEXPR
std
::
size_t
index
(
std
::
initializer_list
<
std
::
size_t
>
x
)
const
{
std
::
size_t
idx
=
0
;
for
(
std
::
size_t
i
=
0
;
i
<
x
.
size
();
i
++
)
idx
+=
*
(
x
.
begin
()
+
i
)
*
strides
[
i
];
return
idx
;
}
MIGRAPHX_DEVICE_CONSTEXPR
std
::
size_t
index
(
std
::
size_t
i
)
const
{
if
(
this
->
standard
)
return
i
;
else
{
const
std
::
size_t
rank
=
this
->
lens
.
size
();
std
::
size_t
s
=
1
;
std
::
size_t
result
=
0
;
for
(
std
::
size_t
j
=
0
;
j
<
this
->
lens
.
size
();
j
++
)
{
const
std
::
size_t
k
=
rank
-
j
-
1
;
const
std
::
size_t
stride
=
this
->
strides
[
k
];
const
std
::
size_t
len
=
this
->
lens
[
k
];
const
std
::
size_t
slen
=
s
*
len
;
const
std
::
size_t
idx
=
(
i
%
slen
)
/
s
;
result
+=
stride
*
idx
;
s
=
slen
;
}
return
result
;
}
}
MIGRAPHX_DEVICE_CONSTEXPR
hip_index
multi
(
std
::
size_t
idx
)
const
{
hip_index
result
;
std
::
size_t
tidx
=
idx
;
for
(
std
::
size_t
is
=
0
;
is
<
result
.
size
();
is
++
)
{
result
[
is
]
=
tidx
/
strides
[
is
];
tidx
=
tidx
%
strides
[
is
];
}
return
result
;
}
MIGRAPHX_DEVICE_CONSTEXPR
hip_index
carry
(
hip_index
result
)
const
{
std
::
ptrdiff_t
rem
=
0
;
for
(
std
::
ptrdiff_t
i
=
result
.
size
()
-
1
;
i
>=
0
;
i
--
)
{
auto
z
=
result
[
i
]
+
rem
;
rem
=
z
-
std
::
ptrdiff_t
(
lens
[
i
])
+
1
;
if
(
rem
>
0
)
z
-=
rem
;
else
rem
=
0
;
result
[
i
]
=
z
;
}
return
result
;
}
};
template
<
std
::
size_t
N
>
hip_shape
<
N
>
make_hip_shape
(
const
shape
&
x
)
{
return
x
;
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/device/include/migraphx/gpu/device/tensor.hpp
View file @
20b1d690
#ifndef MIGRAPHX_GUARD_RTGLIB_DEAVICE_TENSOR_HPP
#ifndef MIGRAPHX_GUARD_RTGLIB_DEAVICE_TENSOR_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEAVICE_TENSOR_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEAVICE_TENSOR_HPP
#include <hip/hip_runtime.h>
#include <migraphx/gpu/device/visit.hpp>
#include <migraphx/functional.hpp>
#include <migraphx/config.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
gpu
{
namespace
device
{
namespace
device
{
template
<
class
F
>
template
<
std
::
size_t
NDim
>
void
visit_tensor_size
(
std
::
size_t
n
,
F
f
)
using
hip_tensor_index
=
hip_array
<
std
::
size_t
,
NDim
>
;
{
switch
(
n
)
{
case
1
:
{
f
(
std
::
integral_constant
<
std
::
size_t
,
1
>
{});
break
;
}
case
2
:
{
f
(
std
::
integral_constant
<
std
::
size_t
,
2
>
{});
break
;
}
case
3
:
{
f
(
std
::
integral_constant
<
std
::
size_t
,
3
>
{});
break
;
}
case
4
:
{
f
(
std
::
integral_constant
<
std
::
size_t
,
4
>
{});
break
;
}
case
5
:
{
f
(
std
::
integral_constant
<
std
::
size_t
,
5
>
{});
break
;
}
default:
throw
std
::
runtime_error
(
"Unknown tensor size"
);
}
}
template
<
size_t
NDim
>
template
<
std
::
size_t
NDim
>
struct
hip_index
{
size_t
d
[
NDim
];
__device__
__host__
size_t
&
operator
[](
size_t
i
)
{
return
d
[
i
];
}
__device__
__host__
size_t
operator
[](
size_t
i
)
const
{
return
d
[
i
];
}
};
template
<
size_t
NDim
>
struct
hip_tensor_descriptor
struct
hip_tensor_descriptor
{
{
__device__
__host__
hip_tensor_descriptor
()
=
default
;
__device__
__host__
hip_tensor_descriptor
()
=
default
;
...
@@ -63,26 +22,27 @@ struct hip_tensor_descriptor
...
@@ -63,26 +22,27 @@ struct hip_tensor_descriptor
std
::
copy
(
s
.
strides
().
begin
(),
s
.
strides
().
end
(),
strides
);
std
::
copy
(
s
.
strides
().
begin
(),
s
.
strides
().
end
(),
strides
);
}
}
__device__
__host__
hip_index
<
NDim
>
multi
(
size_t
idx
)
const
__device__
__host__
hip_
tensor_
index
<
NDim
>
multi
(
std
::
size_t
idx
)
const
{
{
hip_index
<
NDim
>
result
{};
hip_
tensor_
index
<
NDim
>
result
{};
size_t
tidx
=
idx
;
std
::
size_t
tidx
=
idx
;
for
(
size_t
is
=
0
;
is
<
NDim
;
is
++
)
for
(
std
::
size_t
is
=
0
;
is
<
NDim
;
is
++
)
{
{
result
[
is
]
=
tidx
/
strides
[
is
];
result
[
is
]
=
tidx
/
strides
[
is
];
tidx
=
tidx
%
strides
[
is
];
tidx
=
tidx
%
strides
[
is
];
}
}
return
result
;
return
result
;
}
}
__device__
__host__
size_t
linear
(
hip_index
<
NDim
>
s
)
const
__device__
__host__
std
::
size_t
linear
(
hip_
tensor_
index
<
NDim
>
s
)
const
{
{
size_t
idx
=
0
;
std
::
size_t
idx
=
0
;
for
(
size_t
i
=
0
;
i
<
NDim
;
i
++
)
for
(
std
::
size_t
i
=
0
;
i
<
NDim
;
i
++
)
idx
+=
s
[
i
]
*
strides
[
i
];
idx
+=
s
[
i
]
*
strides
[
i
];
return
idx
;
return
idx
;
}
}
size_t
lens
[
NDim
]
=
{};
std
::
size_t
lens
[
NDim
]
=
{};
size_t
strides
[
NDim
]
=
{};
std
::
size_t
strides
[
NDim
]
=
{};
};
};
}
// namespace device
}
// namespace device
...
...
src/targets/gpu/device/include/migraphx/gpu/device/tensor_view.hpp
0 → 100644
View file @
20b1d690
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_TENSOR_VIEW_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_TENSOR_VIEW_HPP
#include <migraphx/gpu/device/shape.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
template
<
class
T
,
std
::
size_t
N
>
struct
hip_tensor_view
{
using
value_type
=
T
;
using
hip_index
=
typename
hip_shape
<
N
>::
hip_index
;
__device__
__host__
hip_tensor_view
()
=
default
;
__host__
hip_tensor_view
(
tensor_view
<
T
>
x
)
:
d
(
x
.
data
()),
s
(
x
.
get_shape
())
{}
__host__
hip_tensor_view
(
T
*
x
,
const
shape
&
ss
)
:
d
(
x
),
s
(
ss
)
{}
MIGRAPHX_DEVICE_CONSTEXPR
const
hip_shape
<
N
>&
get_shape
()
const
{
return
s
;
}
MIGRAPHX_DEVICE_CONSTEXPR
std
::
size_t
size
()
const
{
return
s
.
elements
();
}
MIGRAPHX_DEVICE_CONSTEXPR
value_type
*
data
()
const
{
return
d
;
}
template
<
class
U
>
MIGRAPHX_DEVICE_CONSTEXPR
value_type
&
operator
[](
U
i
)
const
{
return
d
[
s
.
index
(
i
)];
}
MIGRAPHX_DEVICE_CONSTEXPR
value_type
*
begin
()
const
{
return
d
;
}
MIGRAPHX_DEVICE_CONSTEXPR
value_type
*
end
()
const
{
return
d
+
size
();
}
private:
value_type
*
d
=
nullptr
;
hip_shape
<
N
>
s
{};
};
template
<
std
::
size_t
N
,
class
T
>
hip_tensor_view
<
T
,
N
>
make_hip_view
(
const
shape
&
s
,
T
*
x
)
{
return
{
x
,
s
};
}
template
<
std
::
size_t
N
,
class
T
>
hip_tensor_view
<
T
,
N
>
make_hip_view
(
tensor_view
<
T
>
x
)
{
return
{
x
};
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/device/include/migraphx/gpu/device/types.hpp
View file @
20b1d690
...
@@ -8,14 +8,45 @@
...
@@ -8,14 +8,45 @@
#ifndef MIGRAPHX_GUARD_RTGLIB_GPU_DEVICE_TYPES_HPP
#ifndef MIGRAPHX_GUARD_RTGLIB_GPU_DEVICE_TYPES_HPP
#define MIGRAPHX_GUARD_RTGLIB_GPU_DEVICE_TYPES_HPP
#define MIGRAPHX_GUARD_RTGLIB_GPU_DEVICE_TYPES_HPP
#include <hip/hip_runtime.h>
#include <migraphx/half.hpp>
#include <migraphx/half.hpp>
#include <migraphx/config.hpp>
#include <migraphx/config.hpp>
#include <migraphx/tensor_view.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
gpu
{
namespace
device
{
namespace
device
{
#define MIGRAPHX_DEVICE_CONSTEXPR constexpr __device__ __host__ // NOLINT
template
<
class
T
,
std
::
size_t
N
>
using
vec
=
T
__attribute__
((
ext_vector_type
(
N
)));
template
<
std
::
size_t
N
,
class
T
>
__device__
__host__
T
*
as_pointer
(
vec
<
T
,
N
>*
x
)
{
return
reinterpret_cast
<
T
*>
(
x
);
}
template
<
std
::
size_t
N
,
class
T
>
__device__
__host__
vec
<
T
,
N
>*
as_vec
(
T
*
x
)
{
return
reinterpret_cast
<
vec
<
T
,
N
>*>
(
x
);
}
template
<
std
::
size_t
N
,
class
T
>
tensor_view
<
vec
<
T
,
N
>>
as_vec
(
tensor_view
<
T
>
x
)
{
return
{
x
.
get_shape
(),
as_vec
<
N
>
(
x
.
data
())};
}
template
<
std
::
size_t
N
,
class
...
Ts
>
auto
pack_vec
(
Ts
...
xs
)
{
return
[
=
](
auto
f
,
std
::
size_t
n
)
{
return
f
(
as_vec
<
N
>
(
xs
)[
n
]...);
};
}
using
gpu_half
=
__fp16
;
using
gpu_half
=
__fp16
;
namespace
detail
{
namespace
detail
{
...
@@ -25,6 +56,12 @@ struct device_type
...
@@ -25,6 +56,12 @@ struct device_type
using
type
=
T
;
using
type
=
T
;
};
};
template
<
class
T
,
std
::
size_t
N
>
struct
device_type
<
vec
<
T
,
N
>>
{
using
type
=
vec
<
typename
device_type
<
T
>::
type
,
N
>
;
};
template
<
>
template
<
>
struct
device_type
<
half
>
struct
device_type
<
half
>
{
{
...
@@ -38,7 +75,7 @@ struct host_type
...
@@ -38,7 +75,7 @@ struct host_type
};
};
template
<
>
template
<
>
struct
device
_type
<
gpu_half
>
struct
host
_type
<
gpu_half
>
{
{
using
type
=
half
;
using
type
=
half
;
};
};
...
@@ -54,7 +91,7 @@ using device_type = typename detail::device_type<T>::type;
...
@@ -54,7 +91,7 @@ using device_type = typename detail::device_type<T>::type;
template
<
class
T
>
template
<
class
T
>
host_type
<
T
>
host_cast
(
T
x
)
host_type
<
T
>
host_cast
(
T
x
)
{
{
return
reinterpret_cast
<
host_type
<
T
>>
(
x
);
return
reinterpret_cast
<
const
host_type
<
T
>
&
>
(
x
);
}
}
template
<
class
T
>
template
<
class
T
>
...
@@ -64,9 +101,9 @@ host_type<T>* host_cast(T* x)
...
@@ -64,9 +101,9 @@ host_type<T>* host_cast(T* x)
}
}
template
<
class
T
>
template
<
class
T
>
device_type
<
T
>
device_cast
(
T
x
)
device_type
<
T
>
device_cast
(
const
T
&
x
)
{
{
return
reinterpret_cast
<
device_type
<
T
>>
(
x
);
return
reinterpret_cast
<
const
device_type
<
T
>
&
>
(
x
);
}
}
template
<
class
T
>
template
<
class
T
>
...
@@ -76,13 +113,19 @@ device_type<T>* device_cast(T* x)
...
@@ -76,13 +113,19 @@ device_type<T>* device_cast(T* x)
}
}
template
<
class
T
>
template
<
class
T
>
T
to_hip_type
(
T
x
)
tensor_view
<
device_type
<
T
>>
device_cast
(
tensor_view
<
T
>
x
)
{
return
{
x
.
get_shape
(),
reinterpret_cast
<
device_type
<
T
>*>
(
x
.
data
())};
}
template
<
class
T
>
__device__
__host__
T
to_hip_type
(
T
x
)
{
{
return
x
;
return
x
;
}
}
// Hip doens't support __fp16
// Hip doens't support __fp16
inline
float
to_hip_type
(
gpu_half
x
)
{
return
x
;
}
inline
__device__
__host__
float
to_hip_type
(
gpu_half
x
)
{
return
x
;
}
}
// namespace device
}
// namespace device
}
// namespace gpu
}
// namespace gpu
...
...
src/targets/gpu/device/include/migraphx/gpu/device/vector.hpp
0 → 100644
View file @
20b1d690
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_VECTOR_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_VECTOR_HPP
#include <migraphx/gpu/device/types.hpp>
#include <vector>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
template
<
class
T
,
std
::
size_t
N
>
struct
hip_vector
{
MIGRAPHX_DEVICE_CONSTEXPR
hip_vector
()
=
default
;
MIGRAPHX_DEVICE_CONSTEXPR
hip_vector
(
std
::
size_t
s
)
:
len
(
s
)
{}
template
<
class
Iterator
>
__device__
__host__
hip_vector
(
Iterator
start
,
Iterator
last
)
{
auto
it
=
std
::
copy
(
start
,
last
,
d
);
len
=
std
::
distance
(
d
,
it
);
}
__device__
__host__
hip_vector
(
std
::
initializer_list
<
T
>
x
)
{
std
::
copy
(
x
.
begin
(),
x
.
end
(),
d
);
len
=
x
.
size
();
}
MIGRAPHX_DEVICE_CONSTEXPR
T
&
operator
[](
std
::
size_t
i
)
{
return
d
[
i
];
}
MIGRAPHX_DEVICE_CONSTEXPR
const
T
&
operator
[](
std
::
size_t
i
)
const
{
return
d
[
i
];
}
MIGRAPHX_DEVICE_CONSTEXPR
T
&
front
()
{
return
d
[
0
];
}
MIGRAPHX_DEVICE_CONSTEXPR
const
T
&
front
()
const
{
return
d
[
0
];
}
MIGRAPHX_DEVICE_CONSTEXPR
T
&
back
()
{
return
d
[
size
()
-
1
];
}
MIGRAPHX_DEVICE_CONSTEXPR
const
T
&
back
()
const
{
return
d
[
size
()
-
1
];
}
MIGRAPHX_DEVICE_CONSTEXPR
T
*
data
()
{
return
d
;
}
MIGRAPHX_DEVICE_CONSTEXPR
const
T
*
data
()
const
{
return
d
;
}
MIGRAPHX_DEVICE_CONSTEXPR
std
::
size_t
size
()
const
{
return
len
;
}
MIGRAPHX_DEVICE_CONSTEXPR
T
*
begin
()
{
return
d
;
}
MIGRAPHX_DEVICE_CONSTEXPR
const
T
*
begin
()
const
{
return
d
;
}
MIGRAPHX_DEVICE_CONSTEXPR
T
*
end
()
{
return
d
+
size
();
}
MIGRAPHX_DEVICE_CONSTEXPR
const
T
*
end
()
const
{
return
d
+
size
();
}
template
<
class
U
>
MIGRAPHX_DEVICE_CONSTEXPR
void
push_back
(
U
&&
x
)
{
d
[
len
]
=
static_cast
<
U
&&>
(
x
);
len
++
;
}
private:
T
d
[
N
]
=
{};
std
::
size_t
len
=
0
;
};
template
<
std
::
size_t
N
,
class
T
>
hip_vector
<
T
,
N
>
to_hip_vector
(
const
std
::
vector
<
T
>&
x
)
{
hip_vector
<
T
,
N
>
result
(
x
.
size
());
std
::
copy
(
x
.
begin
(),
x
.
end
(),
result
.
begin
());
return
result
;
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/device/include/migraphx/gpu/device/visit.hpp
0 → 100644
View file @
20b1d690
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_VISIT_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_VISIT_HPP
#include <migraphx/gpu/device/tensor_view.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
template
<
class
F
>
void
visit_tensor_size
(
std
::
size_t
n
,
F
f
)
{
switch
(
n
)
{
case
1
:
{
f
(
std
::
integral_constant
<
std
::
size_t
,
1
>
{});
break
;
}
case
2
:
{
f
(
std
::
integral_constant
<
std
::
size_t
,
2
>
{});
break
;
}
case
3
:
{
f
(
std
::
integral_constant
<
std
::
size_t
,
3
>
{});
break
;
}
case
4
:
{
f
(
std
::
integral_constant
<
std
::
size_t
,
4
>
{});
break
;
}
case
5
:
{
f
(
std
::
integral_constant
<
std
::
size_t
,
5
>
{});
break
;
}
default:
throw
std
::
runtime_error
(
"Unknown tensor size"
);
}
}
inline
shape
get_shape
(
const
shape
&
x
)
{
return
x
;
}
template
<
class
T
>
auto
get_shape
(
const
T
&
x
)
->
decltype
(
x
.
get_shape
())
{
return
x
.
get_shape
();
}
template
<
class
V
,
class
F
,
class
...
Ts
>
void
hip_visit_all_impl
(
const
shape
&
s
,
F
f
,
V
&&
v
,
Ts
&&
...
xs
)
{
std
::
initializer_list
<
migraphx
::
shape
::
type_t
>
types
=
{
get_shape
(
xs
).
type
()...};
if
(
!
std
::
all_of
(
types
.
begin
(),
types
.
end
(),
[
&
](
migraphx
::
shape
::
type_t
t
)
{
return
t
==
s
.
type
();
}))
MIGRAPHX_THROW
(
"Types must be the same"
);
std
::
initializer_list
<
std
::
size_t
>
ranks
=
{
get_shape
(
xs
).
lens
().
size
()...};
if
(
!
std
::
all_of
(
ranks
.
begin
(),
ranks
.
end
(),
[
&
](
std
::
size_t
r
)
{
return
r
==
s
.
lens
().
size
();
}))
MIGRAPHX_THROW
(
"Ranks must be the same"
);
visit_tensor_size
(
s
.
lens
().
size
(),
[
&
](
auto
ndim
)
{
s
.
visit_type
([
&
](
auto
as
)
{
v
(
f
(
xs
,
ndim
,
as
)...);
});
});
}
template
<
class
V
,
class
F
,
class
...
Ts
>
void
hip_visit_views_impl
(
const
shape
&
s
,
F
f
,
V
&&
v
,
Ts
&&
...
xs
)
{
std
::
initializer_list
<
std
::
size_t
>
ranks
=
{
get_shape
(
xs
).
lens
().
size
()...};
if
(
!
std
::
all_of
(
ranks
.
begin
(),
ranks
.
end
(),
[
&
](
std
::
size_t
r
)
{
return
r
==
s
.
lens
().
size
();
}))
MIGRAPHX_THROW
(
"Ranks must be the same"
);
visit_tensor_size
(
s
.
lens
().
size
(),
[
&
](
auto
ndim
)
{
v
(
f
(
xs
,
ndim
)...);
});
}
template
<
class
F
>
struct
hip_convert
{
F
f
;
template
<
class
RawData
,
class
N
,
class
As
>
auto
operator
()(
RawData
x
,
N
ndim
,
As
as
)
const
->
decltype
(
make_hip_view
<
ndim
>
(
x
.
get_shape
(),
f
(
as
.
from
(
x
.
data
()))))
{
return
make_hip_view
<
ndim
>
(
x
.
get_shape
(),
f
(
as
.
from
(
x
.
data
())));
}
template
<
class
N
,
class
As
>
auto
operator
()(
const
shape
&
s
,
N
ndim
,
As
)
const
{
return
make_hip_shape
<
ndim
>
(
s
);
}
};
template
<
class
F
>
hip_convert
<
F
>
make_hip_convert
(
F
f
)
{
return
{
f
};
}
template
<
class
F
>
struct
hip_convert_view
{
F
f
;
template
<
class
T
,
class
N
>
auto
operator
()(
tensor_view
<
T
>
x
,
N
ndim
)
const
{
return
make_hip_view
<
ndim
>
(
f
(
x
));
}
template
<
class
N
>
auto
operator
()(
const
shape
&
s
,
N
ndim
)
const
{
return
make_hip_shape
<
ndim
>
(
s
);
}
};
template
<
class
F
>
hip_convert_view
<
F
>
make_hip_convert_view
(
F
f
)
{
return
{
f
};
}
template
<
class
T
,
class
...
Ts
>
auto
hip_visit_all
(
T
&&
x
,
Ts
&&
...
xs
)
{
return
[
&
](
auto
f
)
{
hip_visit_all_impl
(
get_shape
(
x
),
make_hip_convert
([](
auto
*
p
)
{
return
device_cast
(
p
);
}),
f
,
x
,
xs
...);
};
}
template
<
std
::
size_t
N
,
class
T
,
class
...
Ts
>
auto
hip_vec_visit_all
(
T
&&
x
,
Ts
&&
...
xs
)
{
return
[
&
](
auto
f
)
{
hip_visit_all_impl
(
get_shape
(
x
),
make_hip_convert
([](
auto
*
p
)
{
return
as_vec
<
N
>
(
device_cast
(
p
));
}),
f
,
x
,
xs
...);
};
}
template
<
class
T
,
class
...
Ts
>
auto
hip_pointer_visit_all
(
T
&&
x
,
Ts
&&
...
xs
)
{
return
[
&
](
auto
f
)
{
visit_all
(
x
,
xs
...)([
&
](
auto
...
vs
)
{
f
(
device_cast
(
vs
.
data
())...);
});
};
}
template
<
class
T
,
class
...
Ts
>
auto
hip_visit_views
(
T
&&
x
,
Ts
&&
...
xs
)
{
return
[
&
](
auto
f
)
{
hip_visit_views_impl
(
get_shape
(
x
),
make_hip_convert_view
([](
auto
v
)
{
return
device_cast
(
v
);
}),
f
,
x
,
xs
...);
};
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
#endif
src/targets/gpu/device/int8_gemm_pack.cpp
0 → 100644
View file @
20b1d690
#include <migraphx/shape.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/gpu/device/int8_gemm_pack.hpp>
#include <migraphx/gpu/device/launch.hpp>
#include <migraphx/gpu/device/types.hpp>
#include <migraphx/gpu/device/tensor.hpp>
#include <migraphx/gpu/hip.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
int8_gemm_pack_a
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
auto
comp_shape
=
arg
.
get_shape
();
auto
out_lens
=
comp_shape
.
lens
();
auto
dim_0
=
out_lens
.
size
()
-
2
;
auto
dim_1
=
out_lens
.
size
()
-
1
;
std
::
size_t
lda
=
comp_shape
.
strides
()[
dim_0
];
std
::
size_t
m_size
=
out_lens
[
dim_0
]
*
out_lens
[
dim_1
];
visit_all
(
result
,
arg
)([
&
](
auto
output
,
auto
input
)
{
std
::
size_t
nelements
=
comp_shape
.
elements
();
auto
*
out_ptr
=
device_cast
(
output
.
data
());
auto
*
in_ptr
=
device_cast
(
input
.
data
());
visit_tensor_size
(
out_lens
.
size
(),
[
&
](
auto
out_dim
)
{
hip_tensor_descriptor
<
out_dim
>
desc
(
comp_shape
);
gs_launch
(
stream
,
nelements
,
256
)([
=
](
auto
ii
)
{
const
size_t
nb
=
4
;
auto
idx
=
desc
.
multi
(
ii
);
std
::
size_t
i_m
=
idx
[
dim_1
];
std
::
size_t
i_k
=
idx
[
dim_0
];
std
::
size_t
offset
=
ii
/
m_size
*
m_size
;
out_ptr
[
i_k
%
nb
+
(
i_m
+
(
i_k
/
nb
)
*
lda
)
*
nb
+
offset
]
=
in_ptr
[
i_m
+
i_k
*
lda
+
offset
];
});
});
});
}
void
int8_gemm_pack_b
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
auto
trans_shape
=
arg
.
get_shape
();
auto
out_lens
=
trans_shape
.
lens
();
auto
dim_0
=
trans_shape
.
lens
().
size
()
-
2
;
auto
dim_1
=
trans_shape
.
lens
().
size
()
-
1
;
std
::
size_t
ldb
=
trans_shape
.
strides
()[
dim_1
];
auto
wrap_lens
=
out_lens
;
std
::
swap
(
wrap_lens
[
dim_0
],
wrap_lens
[
dim_1
]);
shape
comp_shape
{
trans_shape
.
type
(),
wrap_lens
};
std
::
size_t
m_size
=
out_lens
[
dim_0
]
*
out_lens
[
dim_1
];
visit_all
(
result
,
arg
)([
&
](
auto
output
,
auto
input
)
{
std
::
size_t
nelements
=
comp_shape
.
elements
();
auto
*
out_ptr
=
device_cast
(
output
.
data
());
auto
*
in_ptr
=
device_cast
(
input
.
data
());
visit_tensor_size
(
out_lens
.
size
(),
[
&
](
auto
out_dim
)
{
hip_tensor_descriptor
<
out_dim
>
desc
(
comp_shape
);
gs_launch
(
stream
,
nelements
,
256
)([
=
](
auto
ii
)
{
const
size_t
nb
=
4
;
auto
idx
=
desc
.
multi
(
ii
);
std
::
size_t
i_n
=
idx
[
dim_1
];
std
::
size_t
i_k
=
idx
[
dim_0
];
std
::
size_t
offset
=
ii
/
m_size
*
m_size
;
out_ptr
[
i_k
%
nb
+
(
i_n
+
(
i_k
/
nb
)
*
ldb
)
*
nb
+
offset
]
=
in_ptr
[
i_n
+
i_k
*
ldb
+
offset
];
});
});
});
}
void
sync_stream
(
hipStream_t
stream
)
{
hipStreamSynchronize
(
stream
);
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/logsoftmax.cpp
View file @
20b1d690
#include <migraphx/shape.hpp>
#include <migraphx/shape.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/gpu/device/logsoftmax.hpp>
#include <migraphx/gpu/device/logsoftmax.hpp>
#include <migraphx/gpu/device/reduce.hpp>
#include <migraphx/gpu/device/tensor.hpp>
#include <migraphx/gpu/device/tensor.hpp>
#include <migraphx/gpu/device/launch.hpp>
#include <migraphx/gpu/device/launch.hpp>
#include <migraphx/gpu/device/types.hpp>
#include <migraphx/gpu/device/types.hpp>
...
@@ -11,57 +12,45 @@ inline namespace MIGRAPHX_INLINE_NS {
...
@@ -11,57 +12,45 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace
gpu
{
namespace
gpu
{
namespace
device
{
namespace
device
{
argument
logsoftmax
(
hipStream_t
stream
,
void
logsoftmax
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
,
int
axis
)
const
migraphx
::
shape
&
output_shape
,
std
::
vector
<
migraphx
::
argument
>
args
,
int
axis
)
{
{
auto
lens
=
result
.
get_shape
().
lens
();
auto
lens
=
output_shape
.
lens
();
auto
batch_lens
=
lens
;
std
::
size_t
batch_size
=
std
::
accumulate
(
std
::
size_t
batch_item_num
=
lens
[
axis
];
lens
.
begin
(),
lens
.
begin
()
+
axis
,
std
::
size_t
{
1
},
std
::
multiplies
<
std
::
size_t
>
());
batch_lens
[
axis
]
=
1
;
std
::
size_t
n_dims
=
std
::
accumulate
(
migraphx
::
shape
batch_shape
{
result
.
get_shape
().
type
(),
batch_lens
};
lens
.
begin
()
+
axis
,
lens
.
end
(),
std
::
size_t
{
1
},
std
::
multiplies
<
std
::
size_t
>
());
migraphx
::
shape
comp_shape
{
output_shape
.
type
(),
{
batch_size
,
n_dims
}};
hip_visit_all
(
result
,
arg
,
batch_shape
)([
&
](
auto
output
,
auto
input
,
auto
batch
)
{
const
std
::
size_t
max_block_size
=
256
;
visit_all
(
args
.
back
(),
args
.
front
())([
&
](
auto
output
,
auto
input
)
{
const
std
::
size_t
block_size
=
compute_block_size
(
batch_item_num
,
max_block_size
);
const
auto
*
input_ptr
=
device_cast
(
input
.
data
());
gs_launch
(
stream
,
auto
*
output_ptr
=
device_cast
(
output
.
data
());
batch_shape
.
elements
()
*
block_size
,
block_size
)([
=
](
auto
i
,
auto
idx
)
__device__
{
// each thread is for one item in the batch
auto
data_idx
=
batch
.
multi
(
i
/
block_size
);
gs_launch
(
stream
,
batch_size
)([
=
](
auto
i
)
{
using
type
=
device_type
<
std
::
remove_cv_t
<
typename
decltype
(
input
)
::
value_type
>>
;
std
::
size_t
row_start
=
i
*
n_dims
;
type
init
=
lowest
();
// get max
auto
batch_max
=
input_ptr
[
row_start
];
auto
batch_max
=
block_reduce
<
max_block_size
>
(
for
(
std
::
size_t
j
=
1
;
j
<
n_dims
;
++
j
)
idx
,
max
{},
init
,
batch_item_num
,
[
&
](
auto
j
)
__device__
{
{
data_idx
[
axis
]
=
j
;
auto
ind
=
row_start
+
j
;
return
input
[
data_idx
];
batch_max
=
std
::
max
(
to_hip_type
(
batch_max
),
to_hip_type
(
input_ptr
[
ind
]));
});
}
auto
batch_sum
=
for
(
std
::
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
block_reduce
<
max_block_size
>
(
idx
,
sum
{},
0
,
batch_item_num
,
[
&
](
auto
j
)
__device__
{
{
data_idx
[
axis
]
=
j
;
auto
ind
=
row_start
+
j
;
auto
val
=
input
[
data_idx
]
-
batch_max
;
output_ptr
[
ind
]
=
input_ptr
[
ind
]
-
batch_max
;
return
::
exp
(
to_hip_type
(
val
));
}
});
auto
batch_sum
=
::
exp
(
to_hip_type
(
output_ptr
[
row_start
]));
auto
log_batch_sum
=
::
log
(
to_hip_type
(
batch_sum
))
+
batch_max
;
for
(
std
::
size_t
j
=
1
;
j
<
n_dims
;
++
j
)
{
idx
.
local_stride
(
batch_item_num
,
[
&
](
auto
j
)
{
auto
ind
=
row_start
+
j
;
data_idx
[
axis
]
=
j
;
batch_sum
+=
::
exp
(
to_hip_type
(
output_ptr
[
ind
]));
output
[
data_idx
]
=
input
[
data_idx
]
-
log_batch_sum
;
}
});
batch_sum
=
::
log
(
to_hip_type
(
batch_sum
));
for
(
std
::
size_t
j
=
0
;
j
<
n_dims
;
++
j
)
{
auto
ind
=
row_start
+
j
;
output_ptr
[
ind
]
-=
batch_sum
;
}
});
});
});
});
return
args
.
back
();
}
}
}
// namespace device
}
// namespace device
...
...
src/targets/gpu/device/mul_add.cpp
0 → 100644
View file @
20b1d690
#include <migraphx/gpu/device/add_unary.hpp>
#include <migraphx/gpu/device/nary.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
mul_add
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
,
const
argument
&
arg3
)
{
nary
(
stream
,
result
,
arg1
,
arg2
,
arg3
)([](
auto
x
,
auto
a
,
auto
b
)
{
return
a
*
x
+
b
;
});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/pad.cpp
View file @
20b1d690
...
@@ -4,6 +4,7 @@
...
@@ -4,6 +4,7 @@
#include <migraphx/gpu/device/pad.hpp>
#include <migraphx/gpu/device/pad.hpp>
#include <migraphx/gpu/device/tensor.hpp>
#include <migraphx/gpu/device/tensor.hpp>
#include <migraphx/gpu/device/launch.hpp>
#include <migraphx/gpu/device/launch.hpp>
#include <migraphx/float_equal.hpp>
namespace
migraphx
{
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
inline
namespace
MIGRAPHX_INLINE_NS
{
...
@@ -14,24 +15,26 @@ argument
...
@@ -14,24 +15,26 @@ argument
pad
(
hipStream_t
stream
,
argument
result
,
argument
arg1
,
float
value
,
std
::
vector
<
std
::
int64_t
>
pads
)
pad
(
hipStream_t
stream
,
argument
result
,
argument
arg1
,
float
value
,
std
::
vector
<
std
::
int64_t
>
pads
)
{
{
std
::
size_t
nelements
=
arg1
.
get_shape
().
elements
();
std
::
size_t
nelements
=
arg1
.
get_shape
().
elements
();
hip_visit_all
(
result
,
arg1
)([
&
](
auto
output
,
auto
input
)
{
using
type
=
typename
decltype
(
output
)
::
value_type
;
using
hip_index
=
typename
decltype
(
output
)
::
hip_index
;
type
device_val
=
value
;
if
(
float_equal
(
value
,
std
::
numeric_limits
<
float
>::
lowest
()))
{
device_val
=
device_cast
(
std
::
numeric_limits
<
type
>::
lowest
());
}
gs_launch
(
stream
,
result
.
get_shape
().
elements
())([
=
](
auto
i
)
{
output
.
data
()[
i
]
=
device_val
;
});
nary
(
stream
,
result
)([
=
]
{
return
value
;
});
hip_index
offsets
;
visit_all
(
result
,
arg1
)([
&
](
auto
output
,
auto
input
)
{
std
::
copy
(
pads
.
begin
(),
pads
.
begin
()
+
offsets
.
size
(),
offsets
.
begin
());
visit_tensor_size
(
result
.
get_shape
().
lens
().
size
(),
[
&
](
auto
ndim
)
{
gs_launch
(
stream
,
nelements
)([
=
](
auto
i
)
{
std
::
size_t
offsets
[
ndim
];
auto
idx
=
input
.
get_shape
().
multi
(
i
);
std
::
copy
(
pads
.
begin
(),
pads
.
begin
()
+
ndim
,
offsets
);
for
(
std
::
size_t
j
=
0
;
j
<
offsets
.
size
();
j
++
)
auto
*
outptr
=
output
.
data
();
{
const
auto
*
inptr
=
input
.
data
();
idx
[
j
]
+=
offsets
[
j
];
hip_tensor_descriptor
<
ndim
>
desc_input
(
input
.
get_shape
());
}
hip_tensor_descriptor
<
ndim
>
desc_output
(
output
.
get_shape
());
output
[
idx
]
=
input
.
data
()[
i
];
gs_launch
(
stream
,
nelements
)([
=
](
auto
i
)
{
auto
idx
=
desc_input
.
multi
(
i
);
for
(
std
::
size_t
j
=
0
;
j
<
ndim
;
j
++
)
{
idx
[
j
]
+=
offsets
[
j
];
}
outptr
[
desc_output
.
linear
(
idx
)]
=
inptr
[
i
];
});
});
});
});
});
return
result
;
return
result
;
...
...
src/targets/gpu/device/pow.cpp
0 → 100644
View file @
20b1d690
#include <migraphx/gpu/device/pow.hpp>
#include <migraphx/gpu/device/nary.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
pow
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
)
{
nary
(
stream
,
result
,
arg1
,
arg2
)(
[](
auto
b
,
auto
e
)
{
return
::
pow
(
to_hip_type
(
b
),
to_hip_type
(
e
));
});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/reduce_mean.cpp
0 → 100644
View file @
20b1d690
#include <migraphx/gpu/device/reduce_mean.hpp>
#include <migraphx/gpu/device/reduce.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
reduce_mean
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
std
::
size_t
item_num
=
arg
.
get_shape
().
elements
()
/
result
.
get_shape
().
elements
();
reduce
(
stream
,
result
,
arg
,
sum
{},
0
,
id
{},
mean
{
item_num
});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/reduce_sum.cpp
0 → 100644
View file @
20b1d690
#include <migraphx/gpu/device/reduce_sum.hpp>
#include <migraphx/gpu/device/reduce.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
reduce_sum
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
reduce
(
stream
,
result
,
arg
,
sum
{},
0
,
id
{},
id
{});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/relu.cpp
0 → 100644
View file @
20b1d690
#include <migraphx/gpu/device/relu.hpp>
#include <migraphx/gpu/device/nary.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
relu
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
nary
(
stream
,
result
,
arg
)([](
auto
x
)
{
return
std
::
max
<
decltype
(
x
)
>
(
0
,
x
);
});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/round.cpp
0 → 100644
View file @
20b1d690
#include <migraphx/gpu/device/round.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
round
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
nary
(
stream
,
result
,
arg
)([](
auto
x
)
{
return
::
round
(
to_hip_type
(
x
));
});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/rsqrt.cpp
0 → 100644
View file @
20b1d690
#include <migraphx/gpu/device/rsqrt.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
rsqrt
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
nary
(
stream
,
result
,
arg
)([](
auto
x
)
__device__
{
return
::
rsqrt
(
to_hip_type
(
x
));
});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/sigmoid.cpp
0 → 100644
View file @
20b1d690
#include <migraphx/gpu/device/sigmoid.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
sigmoid
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
nary
(
stream
,
result
,
arg
)([](
auto
x
)
{
return
1.
f
/
(
1.
f
+
::
exp
(
to_hip_type
(
-
x
)));
});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/sign.cpp
0 → 100644
View file @
20b1d690
#include <migraphx/gpu/device/sign.hpp>
#include <migraphx/gpu/device/nary.hpp>
#include <migraphx/gpu/device/types.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
sign
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
)
{
nary
(
stream
,
result
,
arg
)([](
auto
x
)
{
return
(
x
>
0
?
1
:
((
x
<
0
)
?
-
1
:
0
));
});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/softmax.cpp
0 → 100644
View file @
20b1d690
#include <migraphx/shape.hpp>
#include <migraphx/argument.hpp>
#include <migraphx/dfor.hpp>
#include <migraphx/gpu/device/softmax.hpp>
#include <migraphx/gpu/device/reduce.hpp>
#include <migraphx/gpu/device/tensor.hpp>
#include <migraphx/gpu/device/launch.hpp>
#include <migraphx/gpu/device/types.hpp>
#include <migraphx/gpu/hip.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
softmax
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg
,
int
axis
)
{
auto
lens
=
result
.
get_shape
().
lens
();
auto
batch_lens
=
lens
;
std
::
size_t
batch_item_num
=
lens
[
axis
];
batch_lens
[
axis
]
=
1
;
migraphx
::
shape
batch_shape
{
result
.
get_shape
().
type
(),
batch_lens
};
hip_visit_all
(
result
,
arg
,
batch_shape
)([
&
](
auto
output
,
auto
input
,
auto
batch
)
{
const
std
::
size_t
max_block_size
=
256
;
const
std
::
size_t
block_size
=
compute_block_size
(
batch_item_num
,
max_block_size
);
gs_launch
(
stream
,
batch_shape
.
elements
()
*
block_size
,
block_size
)([
=
](
auto
i
,
auto
idx
)
__device__
{
auto
data_idx
=
batch
.
multi
(
i
/
block_size
);
using
type
=
device_type
<
std
::
remove_cv_t
<
typename
decltype
(
input
)
::
value_type
>>
;
type
init
=
lowest
();
auto
batch_max
=
block_reduce
<
max_block_size
>
(
idx
,
max
{},
init
,
batch_item_num
,
[
&
](
auto
j
)
__device__
{
data_idx
[
axis
]
=
j
;
return
input
[
data_idx
];
});
auto
batch_sum
=
block_reduce
<
max_block_size
>
(
idx
,
sum
{},
0
,
batch_item_num
,
[
&
](
auto
j
)
__device__
{
data_idx
[
axis
]
=
j
;
auto
val
=
input
[
data_idx
]
-
batch_max
;
return
::
exp
(
to_hip_type
(
val
));
});
idx
.
local_stride
(
batch_item_num
,
[
&
](
auto
j
)
{
data_idx
[
axis
]
=
j
;
auto
val
=
input
[
data_idx
]
-
batch_max
;
output
[
data_idx
]
=
::
exp
(
to_hip_type
(
val
))
/
batch_sum
;
});
});
});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
src/targets/gpu/device/sqdiff.cpp
0 → 100644
View file @
20b1d690
#include <migraphx/gpu/device/sqdiff.hpp>
#include <migraphx/gpu/device/nary.hpp>
namespace
migraphx
{
inline
namespace
MIGRAPHX_INLINE_NS
{
namespace
gpu
{
namespace
device
{
void
sqdiff
(
hipStream_t
stream
,
const
argument
&
result
,
const
argument
&
arg1
,
const
argument
&
arg2
)
{
nary
(
stream
,
result
,
arg1
,
arg2
)([](
auto
x
,
auto
y
)
{
return
(
x
-
y
)
*
(
x
-
y
);
});
}
}
// namespace device
}
// namespace gpu
}
// namespace MIGRAPHX_INLINE_NS
}
// namespace migraphx
Prev
1
2
3
4
5
6
7
8
9
10
…
15
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment