Commit f155b0e6 authored by Umang Yadav's avatar Umang Yadav
Browse files

merge changes

parent c4cee345
...@@ -21,7 +21,6 @@ ...@@ -21,7 +21,6 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE. * THE SOFTWARE.
*/ */
#include "migraphx/serialize.hpp"
#include <iterator> #include <iterator>
#include <utility> #include <utility>
#include <migraphx/eliminate_fp8.hpp> #include <migraphx/eliminate_fp8.hpp>
...@@ -30,6 +29,7 @@ ...@@ -30,6 +29,7 @@
#include <migraphx/instruction.hpp> #include <migraphx/instruction.hpp>
#include <migraphx/iterator_for.hpp> #include <migraphx/iterator_for.hpp>
#include <migraphx/stringutils.hpp> #include <migraphx/stringutils.hpp>
#include <migraphx/serialize.hpp>
#include <migraphx/ranges.hpp> #include <migraphx/ranges.hpp>
namespace migraphx { namespace migraphx {
......
...@@ -22,7 +22,6 @@ ...@@ -22,7 +22,6 @@
#ifndef MIGRAPHX_GUARD_KERNELS_BITCAST_HPP #ifndef MIGRAPHX_GUARD_KERNELS_BITCAST_HPP
#define MIGRAPHX_GUARD_KERNELS_BITCAST_HPP #define MIGRAPHX_GUARD_KERNELS_BITCAST_HPP
#include <migraphx/kernels/type_traits.hpp> #include <migraphx/kernels/type_traits.hpp>
namespace migraphx { namespace migraphx {
......
...@@ -394,7 +394,6 @@ using fp8e5m2fnuz = float8<migraphx::fp8::f8_type::bf8, true>; ...@@ -394,7 +394,6 @@ using fp8e5m2fnuz = float8<migraphx::fp8::f8_type::bf8, true>;
} }
// NOLINTNEXTLINE // NOLINTNEXTLINE
#define MIGRAPHX_FP8_OTHER_OPS(T) \ #define MIGRAPHX_FP8_OTHER_OPS(T) \
inline constexpr __device__ T fabs(T v) \ inline constexpr __device__ T fabs(T v) \
{ \ { \
...@@ -502,7 +501,6 @@ class numeric_limits<fp8e5m2fnuz> ...@@ -502,7 +501,6 @@ class numeric_limits<fp8e5m2fnuz>
{ {
return fp8e5m2fnuz(0x7F, fp8e5m2fnuz::from_bits()); return fp8e5m2fnuz(0x7F, fp8e5m2fnuz::from_bits());
} }
// this is min value that is not DeNormalized(DeNorm). DeNorm min is 0x01. I am not sure if we // this is min value that is not DeNormalized(DeNorm). DeNorm min is 0x01. I am not sure if we
// want to make this distinction. For the floating points we would end up using lowest most of // want to make this distinction. For the floating points we would end up using lowest most of
// the times. // the times.
...@@ -530,7 +528,9 @@ class numeric_limits<fp8e5m2> ...@@ -530,7 +528,9 @@ class numeric_limits<fp8e5m2>
} }
static constexpr __device__ fp8e5m2 max() { return fp8e5m2(0x7B, fp8e5m2::from_bits()); } static constexpr __device__ fp8e5m2 max() { return fp8e5m2(0x7B, fp8e5m2::from_bits()); }
// this is min value that is not DeNormalized(DeNorm). DeNorm min is 0x01. // this is min value that is not DeNormalized(DeNorm). DeNorm min is 0x01. I am not sure if we
// want to make this distinction. For the floating points we would end up using lowest most of
// the times.
static constexpr __device__ fp8e5m2 min() { return fp8e5m2(0x4, fp8e5m2::from_bits()); } static constexpr __device__ fp8e5m2 min() { return fp8e5m2(0x4, fp8e5m2::from_bits()); }
static constexpr __device__ fp8e5m2 lowest() { return fp8e5m2(0xFB, fp8e5m2::from_bits()); } static constexpr __device__ fp8e5m2 lowest() { return fp8e5m2(0xFB, fp8e5m2::from_bits()); }
...@@ -539,7 +539,6 @@ class numeric_limits<fp8e5m2> ...@@ -539,7 +539,6 @@ class numeric_limits<fp8e5m2>
}; };
} // namespace fp8 } // namespace fp8
template <class T, template <class T,
MIGRAPHX_REQUIRES(is_same<T, fp8::fp8e4m3fnuz>{} or is_same<T, fp8::fp8e5m2fnuz>{} or MIGRAPHX_REQUIRES(is_same<T, fp8::fp8e4m3fnuz>{} or is_same<T, fp8::fp8e5m2fnuz>{} or
is_same<T, fp8::fp8e4m3fn>{} or is_same<T, fp8::fp8e5m2>{})> is_same<T, fp8::fp8e4m3fn>{} or is_same<T, fp8::fp8e5m2>{})>
...@@ -560,7 +559,6 @@ constexpr T numeric_lowest(migraphx::fp8::f8_type unused = migraphx::fp8::f8_typ ...@@ -560,7 +559,6 @@ constexpr T numeric_lowest(migraphx::fp8::f8_type unused = migraphx::fp8::f8_typ
(void)(unused); (void)(unused);
return fp8::numeric_limits<T>::lowest(); return fp8::numeric_limits<T>::lowest();
} }
} // namespace migraphx } // namespace migraphx
// ================================================================================================= // =================================================================================================
#if defined(__clang__) #if defined(__clang__)
......
...@@ -54,7 +54,6 @@ __device__ void generic_binary_layernorm( ...@@ -54,7 +54,6 @@ __device__ void generic_binary_layernorm(
using value_type = typename Input1::type; using value_type = typename Input1::type;
using vec_value_type = vec_type<value_type>; using vec_value_type = vec_type<value_type>;
constexpr auto relements = r.template elements<Input1>(); constexpr auto relements = r.template elements<Input1>();
constexpr auto relements_r = vec_value_type{1.0 / relements}; constexpr auto relements_r = vec_value_type{1.0 / relements};
auto relements_rsqrt = sqrt(relements_r); auto relements_rsqrt = sqrt(relements_r);
......
...@@ -118,7 +118,7 @@ struct highest ...@@ -118,7 +118,7 @@ struct highest
template <class T> template <class T>
constexpr operator T() const constexpr operator T() const
{ {
return numeric_max<vec_type<T>, void>(); return numeric_max<vec_type<T>>();
} }
}; };
} // namespace migraphx } // namespace migraphx
......
...@@ -40,7 +40,6 @@ __device__ void pad(const index& idx, ...@@ -40,7 +40,6 @@ __device__ void pad(const index& idx,
const PadVal& pad_val) const PadVal& pad_val)
{ {
auto output_shape = output.get_shape(); auto output_shape = output.get_shape();
using otype = typename Output::type;
idx.global_stride(output_shape.elements(), [&](auto i) { idx.global_stride(output_shape.elements(), [&](auto i) {
// 1. get current multi-index for output // 1. get current multi-index for output
// 2. get the size of the input to determine input boundaries // 2. get the size of the input to determine input boundaries
......
...@@ -56,7 +56,7 @@ struct avg_pool ...@@ -56,7 +56,7 @@ struct avg_pool
template <class T> template <class T>
MIGRAPHX_DEVICE_CONSTEXPR T operator()(T x, T y) MIGRAPHX_DEVICE_CONSTEXPR T operator()(T x, T y)
{ {
return static_cast<T>(x + y); return x + y;
} }
template <class T> template <class T>
...@@ -70,7 +70,6 @@ template <class Iterator, class Op> ...@@ -70,7 +70,6 @@ template <class Iterator, class Op>
MIGRAPHX_DEVICE_CONSTEXPR typename Iterator::value_type bilinear_interpolate( MIGRAPHX_DEVICE_CONSTEXPR typename Iterator::value_type bilinear_interpolate(
const Iterator data, const array<index_int, 2>& dims, array<float, 2> xy, Op pooling) const Iterator data, const array<index_int, 2>& dims, array<float, 2> xy, Op pooling)
{ {
using ret_type = typename Iterator::value_type;
array<int, 2> low{}; array<int, 2> low{};
array<int, 2> high{}; array<int, 2> high{};
for(index_int ii = 0; ii < xy.size(); ++ii) for(index_int ii = 0; ii < xy.size(); ++ii)
...@@ -93,7 +92,6 @@ MIGRAPHX_DEVICE_CONSTEXPR typename Iterator::value_type bilinear_interpolate( ...@@ -93,7 +92,6 @@ MIGRAPHX_DEVICE_CONSTEXPR typename Iterator::value_type bilinear_interpolate(
high[0] * dims[1] + low[1], high[0] * dims[1] + low[1],
high[0] * dims[1] + high[1]}; high[0] * dims[1] + high[1]};
float ly = xy[0] - low[0]; float ly = xy[0] - low[0];
float lx = xy[1] - low[1]; float lx = xy[1] - low[1];
float hy = 1.0f - ly; float hy = 1.0f - ly;
...@@ -204,25 +202,25 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, W& y_t, ...@@ -204,25 +202,25 @@ __device__ void roialign(const T& x_t, const U& rois_t, const V& ind_t, W& y_t,
const auto offset_x = x + ((batch_ind * channel_num + c) * in_dims[0] * in_dims[1]); const auto offset_x = x + ((batch_ind * channel_num + c) * in_dims[0] * in_dims[1]);
if constexpr(s.is_avg_pooling) if constexpr(s.is_avg_pooling)
{ {
y_t[i] = static_cast<ytype>(calc_pooling(offset_x, y_t[i] = calc_pooling(offset_x,
roi_starts, roi_starts,
bin_size, bin_size,
{ph, pw}, {ph, pw},
bin_grid_size, bin_grid_size,
in_dims, in_dims,
s.roi_offset, s.roi_offset,
avg_pool{})); avg_pool{});
} }
else else
{ {
y_t[i] = static_cast<ytype>(calc_pooling(offset_x, y_t[i] = calc_pooling(offset_x,
roi_starts, roi_starts,
bin_size, bin_size,
{ph, pw}, {ph, pw},
bin_grid_size, bin_grid_size,
in_dims, in_dims,
s.roi_offset, s.roi_offset,
max_pool{})); max_pool{});
} }
} }
} }
......
...@@ -33,7 +33,6 @@ template <index_int Axis, class Input, class Output> ...@@ -33,7 +33,6 @@ template <index_int Axis, class Input, class Output>
__device__ void softmax(Input input1, Output output) __device__ void softmax(Input input1, Output output)
{ {
using block = reduce::auto_block<reduce::reduce_elements_with_axis<Input, Axis>()>; using block = reduce::auto_block<reduce::reduce_elements_with_axis<Input, Axis>()>;
using otype = typename Output::type;
block::template run<reduce::with_axis<Input, Axis>>([&](auto, auto r) { block::template run<reduce::with_axis<Input, Axis>>([&](auto, auto r) {
auto input = r.inner(op::id{})(input1); auto input = r.inner(op::id{})(input1);
#ifdef MIGRAPHX_USE_FAST_SOFTMAX #ifdef MIGRAPHX_USE_FAST_SOFTMAX
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment