Commit 3855c6af authored by Shucai Xiao's avatar Shucai Xiao
Browse files

Merge branch 'opt_log_softmax_new_device_code' into argmax_min

parents b222af2f 93eae2df
...@@ -10,10 +10,7 @@ inline namespace MIGRAPHX_INLINE_NS { ...@@ -10,10 +10,7 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
namespace device { namespace device {
argument gather(hipStream_t stream, argument gather(hipStream_t stream, argument result, argument arg1, argument arg2, int axis);
const migraphx::shape& output_shape,
std::vector<migraphx::argument> args,
int axis);
} // namespace device } // namespace device
} // namespace gpu } // namespace gpu
......
...@@ -10,10 +10,7 @@ inline namespace MIGRAPHX_INLINE_NS { ...@@ -10,10 +10,7 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
namespace device { namespace device {
argument logsoftmax(hipStream_t stream, void logsoftmax(hipStream_t stream, argument result, argument arg, int axis);
const migraphx::shape& output_shape,
std::vector<migraphx::argument> args,
int axis);
} // namespace device } // namespace device
} // namespace gpu } // namespace gpu
......
#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_REDUCE_OPERS_HPP
#define MIGRAPHX_GUARD_RTGLIB_DEVICE_REDUCE_OPERS_HPP
#include <migraphx/gpu/hip.hpp>
#include <migraphx/gpu/device/types.hpp>
#include <migraphx/gpu/device/nary.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
namespace device {
template <class T>
inline __device__ void reduce_max(T* data_ptr, size_t block_size, size_t thr_idx, size_t item_num)
{
while(true)
{
auto stride = (item_num + 1) / 2;
auto size = item_num / 2;
for(size_t i = thr_idx; i < size; i += block_size)
{
data_ptr[i] = ::max(to_hip_type(data_ptr[i]), to_hip_type(data_ptr[i + stride]));
}
__syncthreads();
item_num = stride;
if(item_num == 1)
break;
}
if(thr_idx == 0)
{
data_ptr[block_size] =
(data_ptr[0] < data_ptr[block_size]) ? data_ptr[block_size] : data_ptr[0];
}
__syncthreads();
}
template <class T>
inline __device__ void reduce_sum(T* data_ptr, size_t block_size, size_t thr_idx, size_t item_num)
{
while(true)
{
auto stride = (item_num + 1) / 2;
auto size = item_num / 2;
for(size_t i = thr_idx; i < size; i += block_size)
{
data_ptr[i] += data_ptr[i + stride];
}
__syncthreads();
item_num = stride;
if(item_num == 1)
break;
}
if(thr_idx == 0)
{
data_ptr[block_size] += data_ptr[0];
}
__syncthreads();
}
} // namespace device
} // namespace gpu
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
...@@ -10,10 +10,7 @@ inline namespace MIGRAPHX_INLINE_NS { ...@@ -10,10 +10,7 @@ inline namespace MIGRAPHX_INLINE_NS {
namespace gpu { namespace gpu {
namespace device { namespace device {
argument softmax(hipStream_t stream, void softmax(hipStream_t stream, argument result, argument arg, int axis);
const migraphx::shape& output_shape,
std::vector<migraphx::argument> args,
int axis);
} // namespace device } // namespace device
} // namespace gpu } // namespace gpu
......
...@@ -15,11 +15,11 @@ shape hip_logsoftmax::compute_shape(const std::vector<shape>& inputs) const ...@@ -15,11 +15,11 @@ shape hip_logsoftmax::compute_shape(const std::vector<shape>& inputs) const
return op.compute_shape({inputs.at(0)}); return op.compute_shape({inputs.at(0)});
} }
argument hip_logsoftmax::compute(context& ctx, argument
const shape& output_shape, hip_logsoftmax::compute(context& ctx, const shape&, const std::vector<argument>& args) const
const std::vector<argument>& args) const
{ {
return device::logsoftmax(ctx.get_stream().get(), output_shape, args, op.axis); device::logsoftmax(ctx.get_stream().get(), args.back(), args.front(), op.axis);
return args.back();
} }
} // namespace gpu } // namespace gpu
......
...@@ -37,11 +37,10 @@ shape hip_softmax::compute_shape(const std::vector<shape>& inputs) const ...@@ -37,11 +37,10 @@ shape hip_softmax::compute_shape(const std::vector<shape>& inputs) const
return op.compute_shape({inputs.at(0)}); return op.compute_shape({inputs.at(0)});
} }
argument hip_softmax::compute(context& ctx, argument hip_softmax::compute(context& ctx, const shape&, const std::vector<argument>& args) const
const shape& output_shape,
const std::vector<argument>& args) const
{ {
return device::softmax(ctx.get_stream().get(), output_shape, args, op.axis); device::softmax(ctx.get_stream().get(), args.back(), args.front(), op.axis);
return args.back();
} }
} // namespace gpu } // namespace gpu
......
...@@ -598,7 +598,7 @@ struct test_softmax : verify_program<test_softmax<Axis>> ...@@ -598,7 +598,7 @@ struct test_softmax : verify_program<test_softmax<Axis>>
migraphx::program create_program() const migraphx::program create_program() const
{ {
migraphx::program p; migraphx::program p;
migraphx::shape s{migraphx::shape::float_type, {3, 4, 5, 6}}; migraphx::shape s{migraphx::shape::float_type, {2080, 4, 1026, 6}};
auto param = p.add_parameter("0", s); auto param = p.add_parameter("0", s);
p.add_instruction(migraphx::op::softmax{Axis}, param); p.add_instruction(migraphx::op::softmax{Axis}, param);
...@@ -3350,7 +3350,7 @@ struct test_logsoftmax : verify_program<test_logsoftmax<Axis>> ...@@ -3350,7 +3350,7 @@ struct test_logsoftmax : verify_program<test_logsoftmax<Axis>>
migraphx::program create_program() const migraphx::program create_program() const
{ {
migraphx::program p; migraphx::program p;
migraphx::shape s{migraphx::shape::float_type, {3, 4, 5, 6}}; migraphx::shape s{migraphx::shape::float_type, {1025, 4, 1025, 6}};
auto param = p.add_parameter("0", s); auto param = p.add_parameter("0", s);
p.add_instruction(migraphx::op::logsoftmax{Axis}, param); p.add_instruction(migraphx::op::logsoftmax{Axis}, param);
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment