Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
MIGraphX
Commits
f155b0e6
Commit
f155b0e6
authored
Dec 01, 2023
by
Umang Yadav
Browse files
merge changes
parent
c4cee345
Changes
8
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
22 additions
and
30 deletions
+22
-30
src/eliminate_fp8.cpp
src/eliminate_fp8.cpp
+1
-1
src/targets/gpu/kernels/include/migraphx/kernels/bit_cast.hpp
...targets/gpu/kernels/include/migraphx/kernels/bit_cast.hpp
+0
-1
src/targets/gpu/kernels/include/migraphx/kernels/float8.hpp
src/targets/gpu/kernels/include/migraphx/kernels/float8.hpp
+3
-5
src/targets/gpu/kernels/include/migraphx/kernels/layernorm.hpp
...argets/gpu/kernels/include/migraphx/kernels/layernorm.hpp
+0
-1
src/targets/gpu/kernels/include/migraphx/kernels/ops.hpp
src/targets/gpu/kernels/include/migraphx/kernels/ops.hpp
+1
-1
src/targets/gpu/kernels/include/migraphx/kernels/pad.hpp
src/targets/gpu/kernels/include/migraphx/kernels/pad.hpp
+0
-1
src/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp
...targets/gpu/kernels/include/migraphx/kernels/roialign.hpp
+17
-19
src/targets/gpu/kernels/include/migraphx/kernels/softmax.hpp
src/targets/gpu/kernels/include/migraphx/kernels/softmax.hpp
+0
-1
No files found.
src/eliminate_fp8.cpp
View file @
f155b0e6
...
@@ -21,7 +21,6 @@
...
@@ -21,7 +21,6 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
* THE SOFTWARE.
*/
*/
#include "migraphx/serialize.hpp"
#include <iterator>
#include <iterator>
#include <utility>
#include <utility>
#include <migraphx/eliminate_fp8.hpp>
#include <migraphx/eliminate_fp8.hpp>
...
@@ -30,6 +29,7 @@
...
@@ -30,6 +29,7 @@
#include <migraphx/instruction.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/iterator_for.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/stringutils.hpp>
#include <migraphx/serialize.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/ranges.hpp>
namespace
migraphx
{
namespace
migraphx
{
...
...
src/targets/gpu/kernels/include/migraphx/kernels/bit_cast.hpp
View file @
f155b0e6
...
@@ -22,7 +22,6 @@
...
@@ -22,7 +22,6 @@
#ifndef MIGRAPHX_GUARD_KERNELS_BITCAST_HPP
#ifndef MIGRAPHX_GUARD_KERNELS_BITCAST_HPP
#define MIGRAPHX_GUARD_KERNELS_BITCAST_HPP
#define MIGRAPHX_GUARD_KERNELS_BITCAST_HPP
#include <migraphx/kernels/type_traits.hpp>
#include <migraphx/kernels/type_traits.hpp>
namespace
migraphx
{
namespace
migraphx
{
...
...
src/targets/gpu/kernels/include/migraphx/kernels/float8.hpp
View file @
f155b0e6
...
@@ -394,7 +394,6 @@ using fp8e5m2fnuz = float8<migraphx::fp8::f8_type::bf8, true>;
...
@@ -394,7 +394,6 @@ using fp8e5m2fnuz = float8<migraphx::fp8::f8_type::bf8, true>;
}
}
// NOLINTNEXTLINE
// NOLINTNEXTLINE
#define MIGRAPHX_FP8_OTHER_OPS(T) \
#define MIGRAPHX_FP8_OTHER_OPS(T) \
inline constexpr __device__ T fabs(T v) \
inline constexpr __device__ T fabs(T v) \
{ \
{ \
...
@@ -502,7 +501,6 @@ class numeric_limits<fp8e5m2fnuz>
...
@@ -502,7 +501,6 @@ class numeric_limits<fp8e5m2fnuz>
{
{
return
fp8e5m2fnuz
(
0x7F
,
fp8e5m2fnuz
::
from_bits
());
return
fp8e5m2fnuz
(
0x7F
,
fp8e5m2fnuz
::
from_bits
());
}
}
// this is min value that is not DeNormalized(DeNorm). DeNorm min is 0x01. I am not sure if we
// this is min value that is not DeNormalized(DeNorm). DeNorm min is 0x01. I am not sure if we
// want to make this distinction. For the floating points we would end up using lowest most of
// want to make this distinction. For the floating points we would end up using lowest most of
// the times.
// the times.
...
@@ -530,7 +528,9 @@ class numeric_limits<fp8e5m2>
...
@@ -530,7 +528,9 @@ class numeric_limits<fp8e5m2>
}
}
static
constexpr
__device__
fp8e5m2
max
()
{
return
fp8e5m2
(
0x7B
,
fp8e5m2
::
from_bits
());
}
static
constexpr
__device__
fp8e5m2
max
()
{
return
fp8e5m2
(
0x7B
,
fp8e5m2
::
from_bits
());
}
// this is min value that is not DeNormalized(DeNorm). DeNorm min is 0x01.
// this is min value that is not DeNormalized(DeNorm). DeNorm min is 0x01. I am not sure if we
// want to make this distinction. For the floating points we would end up using lowest most of
// the times.
static
constexpr
__device__
fp8e5m2
min
()
{
return
fp8e5m2
(
0x4
,
fp8e5m2
::
from_bits
());
}
static
constexpr
__device__
fp8e5m2
min
()
{
return
fp8e5m2
(
0x4
,
fp8e5m2
::
from_bits
());
}
static
constexpr
__device__
fp8e5m2
lowest
()
{
return
fp8e5m2
(
0xFB
,
fp8e5m2
::
from_bits
());
}
static
constexpr
__device__
fp8e5m2
lowest
()
{
return
fp8e5m2
(
0xFB
,
fp8e5m2
::
from_bits
());
}
...
@@ -539,7 +539,6 @@ class numeric_limits<fp8e5m2>
...
@@ -539,7 +539,6 @@ class numeric_limits<fp8e5m2>
};
};
}
// namespace fp8
}
// namespace fp8
template
<
class
T
,
template
<
class
T
,
MIGRAPHX_REQUIRES
(
is_same
<
T
,
fp8
::
fp8e4m3fnuz
>{}
or
is_same
<
T
,
fp8
::
fp8e5m2fnuz
>
{}
or
MIGRAPHX_REQUIRES
(
is_same
<
T
,
fp8
::
fp8e4m3fnuz
>{}
or
is_same
<
T
,
fp8
::
fp8e5m2fnuz
>
{}
or
is_same
<
T
,
fp8
::
fp8e4m3fn
>
{}
or
is_same
<
T
,
fp8
::
fp8e5m2
>
{})
>
is_same
<
T
,
fp8
::
fp8e4m3fn
>
{}
or
is_same
<
T
,
fp8
::
fp8e5m2
>
{})
>
...
@@ -560,7 +559,6 @@ constexpr T numeric_lowest(migraphx::fp8::f8_type unused = migraphx::fp8::f8_typ
...
@@ -560,7 +559,6 @@ constexpr T numeric_lowest(migraphx::fp8::f8_type unused = migraphx::fp8::f8_typ
(
void
)(
unused
);
(
void
)(
unused
);
return
fp8
::
numeric_limits
<
T
>::
lowest
();
return
fp8
::
numeric_limits
<
T
>::
lowest
();
}
}
}
// namespace migraphx
}
// namespace migraphx
// =================================================================================================
// =================================================================================================
#if defined(__clang__)
#if defined(__clang__)
...
...
src/targets/gpu/kernels/include/migraphx/kernels/layernorm.hpp
View file @
f155b0e6
...
@@ -54,7 +54,6 @@ __device__ void generic_binary_layernorm(
...
@@ -54,7 +54,6 @@ __device__ void generic_binary_layernorm(
using
value_type
=
typename
Input1
::
type
;
using
value_type
=
typename
Input1
::
type
;
using
vec_value_type
=
vec_type
<
value_type
>
;
using
vec_value_type
=
vec_type
<
value_type
>
;
constexpr
auto
relements
=
r
.
template
elements
<
Input1
>();
constexpr
auto
relements
=
r
.
template
elements
<
Input1
>();
constexpr
auto
relements_r
=
vec_value_type
{
1.0
/
relements
};
constexpr
auto
relements_r
=
vec_value_type
{
1.0
/
relements
};
auto
relements_rsqrt
=
sqrt
(
relements_r
);
auto
relements_rsqrt
=
sqrt
(
relements_r
);
...
...
src/targets/gpu/kernels/include/migraphx/kernels/ops.hpp
View file @
f155b0e6
...
@@ -118,7 +118,7 @@ struct highest
...
@@ -118,7 +118,7 @@ struct highest
template
<
class
T
>
template
<
class
T
>
constexpr
operator
T
()
const
constexpr
operator
T
()
const
{
{
return
numeric_max
<
vec_type
<
T
>
,
void
>
();
return
numeric_max
<
vec_type
<
T
>>
();
}
}
};
};
}
// namespace migraphx
}
// namespace migraphx
...
...
src/targets/gpu/kernels/include/migraphx/kernels/pad.hpp
View file @
f155b0e6
...
@@ -40,7 +40,6 @@ __device__ void pad(const index& idx,
...
@@ -40,7 +40,6 @@ __device__ void pad(const index& idx,
const
PadVal
&
pad_val
)
const
PadVal
&
pad_val
)
{
{
auto
output_shape
=
output
.
get_shape
();
auto
output_shape
=
output
.
get_shape
();
using
otype
=
typename
Output
::
type
;
idx
.
global_stride
(
output_shape
.
elements
(),
[
&
](
auto
i
)
{
idx
.
global_stride
(
output_shape
.
elements
(),
[
&
](
auto
i
)
{
// 1. get current multi-index for output
// 1. get current multi-index for output
// 2. get the size of the input to determine input boundaries
// 2. get the size of the input to determine input boundaries
...
...
src/targets/gpu/kernels/include/migraphx/kernels/roialign.hpp
View file @
f155b0e6
...
@@ -56,7 +56,7 @@ struct avg_pool
...
@@ -56,7 +56,7 @@ struct avg_pool
template
<
class
T
>
template
<
class
T
>
MIGRAPHX_DEVICE_CONSTEXPR
T
operator
()(
T
x
,
T
y
)
MIGRAPHX_DEVICE_CONSTEXPR
T
operator
()(
T
x
,
T
y
)
{
{
return
static_cast
<
T
>
(
x
+
y
)
;
return
x
+
y
;
}
}
template
<
class
T
>
template
<
class
T
>
...
@@ -70,7 +70,6 @@ template <class Iterator, class Op>
...
@@ -70,7 +70,6 @@ template <class Iterator, class Op>
MIGRAPHX_DEVICE_CONSTEXPR
typename
Iterator
::
value_type
bilinear_interpolate
(
MIGRAPHX_DEVICE_CONSTEXPR
typename
Iterator
::
value_type
bilinear_interpolate
(
const
Iterator
data
,
const
array
<
index_int
,
2
>&
dims
,
array
<
float
,
2
>
xy
,
Op
pooling
)
const
Iterator
data
,
const
array
<
index_int
,
2
>&
dims
,
array
<
float
,
2
>
xy
,
Op
pooling
)
{
{
using
ret_type
=
typename
Iterator
::
value_type
;
array
<
int
,
2
>
low
{};
array
<
int
,
2
>
low
{};
array
<
int
,
2
>
high
{};
array
<
int
,
2
>
high
{};
for
(
index_int
ii
=
0
;
ii
<
xy
.
size
();
++
ii
)
for
(
index_int
ii
=
0
;
ii
<
xy
.
size
();
++
ii
)
...
@@ -93,7 +92,6 @@ MIGRAPHX_DEVICE_CONSTEXPR typename Iterator::value_type bilinear_interpolate(
...
@@ -93,7 +92,6 @@ MIGRAPHX_DEVICE_CONSTEXPR typename Iterator::value_type bilinear_interpolate(
high
[
0
]
*
dims
[
1
]
+
low
[
1
],
high
[
0
]
*
dims
[
1
]
+
low
[
1
],
high
[
0
]
*
dims
[
1
]
+
high
[
1
]};
high
[
0
]
*
dims
[
1
]
+
high
[
1
]};
float
ly
=
xy
[
0
]
-
low
[
0
];
float
ly
=
xy
[
0
]
-
low
[
0
];
float
lx
=
xy
[
1
]
-
low
[
1
];
float
lx
=
xy
[
1
]
-
low
[
1
];
float
hy
=
1.0
f
-
ly
;
float
hy
=
1.0
f
-
ly
;
...
@@ -204,25 +202,25 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, W& y_t,
...
@@ -204,25 +202,25 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, W& y_t,
const
auto
offset_x
=
x
+
((
batch_ind
*
channel_num
+
c
)
*
in_dims
[
0
]
*
in_dims
[
1
]);
const
auto
offset_x
=
x
+
((
batch_ind
*
channel_num
+
c
)
*
in_dims
[
0
]
*
in_dims
[
1
]);
if
constexpr
(
s
.
is_avg_pooling
)
if
constexpr
(
s
.
is_avg_pooling
)
{
{
y_t
[
i
]
=
static_cast
<
ytype
>
(
calc_pooling
(
offset_x
,
y_t
[
i
]
=
calc_pooling
(
offset_x
,
roi_starts
,
roi_starts
,
bin_size
,
bin_size
,
{
ph
,
pw
},
{
ph
,
pw
},
bin_grid_size
,
bin_grid_size
,
in_dims
,
in_dims
,
s
.
roi_offset
,
s
.
roi_offset
,
avg_pool
{})
)
;
avg_pool
{});
}
}
else
else
{
{
y_t
[
i
]
=
static_cast
<
ytype
>
(
calc_pooling
(
offset_x
,
y_t
[
i
]
=
calc_pooling
(
offset_x
,
roi_starts
,
roi_starts
,
bin_size
,
bin_size
,
{
ph
,
pw
},
{
ph
,
pw
},
bin_grid_size
,
bin_grid_size
,
in_dims
,
in_dims
,
s
.
roi_offset
,
s
.
roi_offset
,
max_pool
{})
)
;
max_pool
{});
}
}
}
}
}
}
...
...
src/targets/gpu/kernels/include/migraphx/kernels/softmax.hpp
View file @
f155b0e6
...
@@ -33,7 +33,6 @@ template <index_int Axis, class Input, class Output>
...
@@ -33,7 +33,6 @@ template <index_int Axis, class Input, class Output>
__device__
void
softmax
(
Input
input1
,
Output
output
)
__device__
void
softmax
(
Input
input1
,
Output
output
)
{
{
using
block
=
reduce
::
auto_block
<
reduce
::
reduce_elements_with_axis
<
Input
,
Axis
>
()
>
;
using
block
=
reduce
::
auto_block
<
reduce
::
reduce_elements_with_axis
<
Input
,
Axis
>
()
>
;
using
otype
=
typename
Output
::
type
;
block
::
template
run
<
reduce
::
with_axis
<
Input
,
Axis
>
>
([
&
](
auto
,
auto
r
)
{
block
::
template
run
<
reduce
::
with_axis
<
Input
,
Axis
>
>
([
&
](
auto
,
auto
r
)
{
auto
input
=
r
.
inner
(
op
::
id
{})(
input1
);
auto
input
=
r
.
inner
(
op
::
id
{})(
input1
);
#ifdef MIGRAPHX_USE_FAST_SOFTMAX
#ifdef MIGRAPHX_USE_FAST_SOFTMAX
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment