Merge pull request #268 from ROCmSoftwarePlatform/logsoftmax_modification

Logsoftmax modification

Merge pull request #268 from ROCmSoftwarePlatform/logsoftmax_modification
Logsoftmax modification
0c798442 · mvermeulen · GitHub · 0d796941 · e8afe91a · 0c798442
Unverified Commit 0c798442 authored Jun 03, 2019 by mvermeulen Committed by GitHub Jun 03, 2019
6 changed files
--- a/src/include/migraphx/op/logsoftmax.hpp
+++ b/src/include/migraphx/op/logsoftmax.hpp
@@ -30,7 +30,7 @@ struct logsoftmax
    shape compute_shape(std::vector<shape> inputs) const
    {
        check_shapes{inputs}.has(1).standard();
-        if(axis < 0 || axis > inputs[0].lens().size())
+        if(axis < 0 || axis >= inputs[0].lens().size())
        {
            MIGRAPHX_THROW("LogSoftMax: input axis value " + std::to_string(axis) +
                           " is out of range");

--- a/src/targets/cpu/lowering.cpp
+++ b/src/targets/cpu/lowering.cpp
@@ -569,33 +569,19 @@ struct cpu_logsoftmax
    shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }

    template <typename T>
-    std::size_t compute_batch_index(const T& idx, shape& batch_shape, int axis) const
+    std::size_t compute_batch_index(T idx, const shape& batch_shape, int axis) const
    {
-        if(axis == 0)
-        {
-            return 0;
-        }
-        else
-        {
-            std::vector<std::size_t> batch_idx(idx.begin(), idx.begin() + axis);
-            return batch_shape.index(batch_idx.begin(), batch_idx.end());
-        }
+        idx[axis] = 0;
+        return batch_shape.index(idx);
    }

    argument compute(context&, const shape& output_shape, std::vector<argument> args) const
    {
        argument result{output_shape};
-        auto lens = output_shape.lens();
-        std::vector<std::size_t> batch_lens{};
-        if(op.axis == 0)
-        {
-            batch_lens.push_back(1);
-        }
-        else
-        {
-            batch_lens.insert(batch_lens.begin(), lens.begin(), lens.begin() + op.axis);
-        }
-        shape batch_shape{migraphx::shape::uint32_type, batch_lens};
+        auto batch_lens     = output_shape.lens();
+        batch_lens[op.axis] = 1;
+        shape batch_shape{shape::int32_type, batch_lens};
+
        visit_all(result, args[0])([&](auto output, auto input) {
            using value_type = typename decltype(input)::value_type;
            std::vector<value_type> batch_max(batch_shape.elements(),

--- a/src/targets/gpu/device/logsoftmax.cpp
+++ b/src/targets/gpu/device/logsoftmax.cpp
@@ -17,47 +17,56 @@ argument logsoftmax(hipStream_t stream,
                    int axis)
 {

-    auto lens              = output_shape.lens();
-    std::size_t batch_size = std::accumulate(
-        lens.begin(), lens.begin() + axis, std::size_t{1}, std::multiplies<std::size_t>());
-    std::size_t n_dims = std::accumulate(
-        lens.begin() + axis, lens.end(), std::size_t{1}, std::multiplies<std::size_t>());
-    migraphx::shape comp_shape{output_shape.type(), {batch_size, n_dims}};
+    auto lens         = output_shape.lens();
+    auto num_in_batch = lens[axis];
+    auto batch_lens   = lens;
+    batch_lens[axis]  = 1;
+    migraphx::shape batch_shape{output_shape.type(), batch_lens};

    visit_all(args.back(), args.front())([&](auto output, auto input) {
        const auto* input_ptr = device_cast(input.data());
        auto* output_ptr      = device_cast(output.data());
+        visit_tensor_size(batch_shape.lens().size(), [&](auto n_dim) {
+            hip_tensor_descriptor<n_dim> desc_batch(batch_shape);
+            hip_tensor_descriptor<n_dim> desc_data(output_shape);

-        // each thread is for one item in the batch
-        gs_launch(stream, batch_size)([=](auto i) {
-            std::size_t row_start = i * n_dims;
-            // get max
-            auto batch_max = input_ptr[row_start];
-            for(std::size_t j = 1; j < n_dims; ++j)
-            {
-                auto ind  = row_start + j;
-                batch_max = std::max(to_hip_type(batch_max), to_hip_type(input_ptr[ind]));
-            }
+            // each thread is for one item in the batch
+            gs_launch(stream, batch_shape.elements())([=](auto i) {
+                auto batch_idx = desc_batch.multi(i);
+                auto data_idx  = batch_idx;

-            for(std::size_t j = 0; j < n_dims; ++j)
-            {
-                auto ind        = row_start + j;
-                output_ptr[ind] = input_ptr[ind] - batch_max;
-            }
+                // get max
+                auto batch_max = input_ptr[desc_data.linear(batch_idx)];
+                for(std::size_t j = 1; j < num_in_batch; ++j)
+                {
+                    data_idx[axis] = j;
+                    size_t idx     = desc_data.linear(data_idx);
+                    batch_max      = std::max(to_hip_type(batch_max), to_hip_type(input_ptr[idx]));
+                }

-            auto batch_sum = ::exp(to_hip_type(output_ptr[row_start]));
-            for(std::size_t j = 1; j < n_dims; ++j)
-            {
-                auto ind = row_start + j;
-                batch_sum += ::exp(to_hip_type(output_ptr[ind]));
-            }
-            batch_sum = ::log(to_hip_type(batch_sum));
+                for(std::size_t j = 0; j < num_in_batch; ++j)
+                {
+                    data_idx[axis]  = j;
+                    size_t idx      = desc_data.linear(data_idx);
+                    output_ptr[idx] = input_ptr[idx] - batch_max;
+                }

-            for(std::size_t j = 0; j < n_dims; ++j)
-            {
-                auto ind = row_start + j;
-                output_ptr[ind] -= batch_sum;
-            }
+                auto batch_sum = ::exp(to_hip_type(output_ptr[desc_data.linear(batch_idx)]));
+                for(std::size_t j = 1; j < num_in_batch; ++j)
+                {
+                    data_idx[axis] = j;
+                    size_t idx     = desc_data.linear(data_idx);
+                    batch_sum += ::exp(to_hip_type(output_ptr[idx]));
+                }
+                batch_sum = ::log(to_hip_type(batch_sum));
+
+                for(std::size_t j = 0; j < num_in_batch; ++j)
+                {
+                    data_idx[axis] = j;
+                    size_t idx     = desc_data.linear(data_idx);
+                    output_ptr[idx] -= batch_sum;
+                }
+            });
        });
    });


--- a/test/cpu_ops_test.cpp
+++ b/test/cpu_ops_test.cpp
@@ -1002,14 +1002,13 @@ TEST_CASE(logsoftmax_test_axis_0)
        -0.99628491, 1.04314606,  -1.22943315, 0.76930403,  0.31106618};

    std::vector<float> s = {
-        -2.71138556, -5.85030702, -3.74063578, -4.22915517, -6.15821977, -5.96072346, -3.57208097,
-        -5.78313166, -5.51435497, -3.67224195, -3.88393048, -2.57061599, -5.54431083, -6.27880025,
-        -5.1878749,  -6.1318955,  -5.29178545, -4.22537886, -3.75693516, -7.07047099, -4.45763333,
-        -4.66281846, -6.18290503, -4.11886536, -6.17408292, -4.18030052, -4.64570814, -4.64354473,
-        -3.06629525, -3.80807681, -4.69162374, -5.53605222, -3.20969275, -4.82645674, -6.63942356,
-        -4.73634471, -3.86003866, -5.32738981, -4.22249802, -4.51258693, -2.41455206, -3.48343199,
-        -5.86215889, -4.93435935, -4.83713408, -2.97471885, -2.16666459, -3.69133151, -4.71640968,
-        -5.64652924, -3.60709827, -5.87967748, -3.8809403,  -4.33917815};
+        -0.135261, -2.843968, -0.659995, -0.488413, -1.051857, -2.812936, -0.250956, -0.353985,
+        -1.155980, -0.603651, -0.211969, -0.175371, -1.336552, -3.885010, -1.871544, -0.837083,
+        -0.887745, -0.433338, -1.158864, -4.911197, -1.147972, -0.666711, -0.996874, -0.981418,
+        -0.851145, -0.853988, -0.858112, -2.067420, -0.059956, -0.727436, -0.950881, -0.429689,
+        -0.061906, -1.505332, -1.210277, -0.377970, -0.791448, -1.655428, -1.827253, -0.304828,
+        -0.020762, -0.167101, -0.567346, -0.530319, -1.045094, -0.376648, -0.007391, -0.381670,
+        -0.720302, -0.460499, -0.469651, -0.556740, -0.554628, -0.551582};

    migraphx::shape a_shape{migraphx::shape::float_type, {2, 3, 3, 3}};
    auto al  = p.add_literal(migraphx::literal{a_shape, a});
@@ -1036,14 +1035,13 @@ TEST_CASE(logsoftmax_test_axis_1)
        -0.99628491, 1.04314606,  -1.22943315, 0.76930403,  0.31106618};

    std::vector<float> s = {
-        -1.77931988, -4.91824134, -2.80857010, -3.29708949, -5.22615409, -5.02865778, -2.64001529,
-        -4.85106598, -4.58228929, -2.74017627, -2.95186480, -1.63855031, -4.61224515, -5.34673457,
-        -4.25580922, -5.19982982, -4.35971977, -3.29331318, -2.82486948, -6.13840531, -3.52556765,
-        -3.73075278, -5.25083935, -3.18679968, -5.24201724, -3.24823484, -3.71364246, -4.14309917,
-        -2.56584969, -3.30763125, -4.19117818, -5.03560666, -2.70924719, -4.32601118, -6.13897800,
-        -4.23589915, -3.35959310, -4.82694425, -3.72205246, -4.01214137, -1.91410650, -2.98298643,
-        -5.36171333, -4.43391379, -4.33668852, -2.47427329, -1.66621903, -3.19088595, -4.21596412,
-        -5.14608368, -3.10665271, -5.37923192, -3.38049474, -3.83873259};
+        -0.550468, -2.132973, -1.549746, -0.650533, -1.051529, -2.248570, -0.141017, -2.028357,
+        -1.947730, -1.511324, -0.166597, -0.379726, -1.965689, -1.172109, -1.475721, -2.700831,
+        -1.537011, -0.658754, -1.596017, -3.353137, -2.266743, -1.084197, -1.076214, -0.406712,
+        -2.743019, -0.425526, -1.079083, -2.139486, -1.270584, -1.024088, -1.154231, -3.201762,
+        -0.888957, -0.532855, -3.103583, -1.221339, -1.355980, -3.531678, -1.438510, -0.975194,
+        -0.080261, -1.162697, -1.568557, -1.398519, -1.322129, -0.470660, -0.370953, -0.907343,
+        -1.179017, -3.312239, -1.286363, -1.586076, -0.345100, -0.824173};

    migraphx::shape a_shape{migraphx::shape::float_type, {2, 3, 3, 3}};
    auto al  = p.add_literal(migraphx::literal{a_shape, a});
@@ -1070,14 +1068,13 @@ TEST_CASE(logsoftmax_test_axis_2)
        -0.99628491, 1.04314606,  -1.22943315, 0.76930403,  0.31106618};

    std::vector<float> s = {
-        -0.79763715, -3.93655861, -1.82688737, -2.31540676, -4.24447136, -4.04697505, -1.65833256,
-        -3.86938325, -3.60060656, -1.81223672, -2.02392525, -0.71061076, -3.68430560, -4.41879502,
-        -3.32786967, -4.27189027, -3.43178022, -2.36537363, -1.35498658, -4.66852241, -2.05568475,
-        -2.26086988, -3.78095645, -1.71691678, -3.77213434, -1.77835194, -2.24375956, -2.74631770,
-        -1.16906822, -1.91084978, -2.79439671, -3.63882519, -1.31246572, -2.92922971, -4.74219653,
-        -2.83911768, -2.19738500, -3.66473615, -2.55984436, -2.84993327, -0.75189840, -1.82077833,
-        -4.19950523, -3.27170569, -3.17448042, -1.65286841, -0.84481415, -2.36948107, -3.39455924,
-        -4.32467880, -2.28524783, -4.55782704, -2.55908986, -3.01732771};
+        -0.495957, -1.031212, -0.245531, -2.013726, -1.339125, -2.465619, -1.356652, -0.964037,
+        -2.019250, -0.214522, -0.289569, -0.234392, -2.086591, -2.684439, -2.851651, -2.674176,
+        -1.697424, -1.889155, -0.401029, -3.064586, -1.173030, -1.306912, -2.177020, -0.834262,
+        -2.818177, -0.174415, -1.361105, -1.024571, -0.106766, -1.167645, -1.072650, -2.576522,
+        -0.569261, -1.207483, -3.679894, -2.095913, -0.504264, -3.039291, -1.290559, -1.156812,
+        -0.126453, -0.551493, -2.506384, -2.646261, -1.905195, -0.206994, -0.191369, -0.959754,
+        -1.948685, -3.671233, -0.875521, -3.111952, -1.905644, -1.6076011};

    migraphx::shape a_shape{migraphx::shape::float_type, {2, 3, 3, 3}};
    auto al  = p.add_literal(migraphx::literal{a_shape, a});
@@ -1104,14 +1101,13 @@ TEST_CASE(logsoftmax_test_axis_3)
        -0.99628491, 1.04314606,  -1.22943315, 0.76930403,  0.31106618};

    std::vector<float> s = {
-        -0.33690375, -3.47582521, -1.36615397, -0.27936556, -2.20843016, -2.01093385, -0.22551114,
-        -2.43656183, -2.16778514, -1.57241522, -1.78410375, -0.47078926, -1.06745881, -1.80194823,
-        -0.71102288, -2.30719726, -1.46708721, -0.40068062, -0.42698261, -3.74051844, -1.12768078,
-        -1.07891856, -2.59900513, -0.53496546, -2.56139951, -0.56761711, -1.03302473, -2.09771276,
-        -0.52046328, -1.26224484, -1.76322959, -2.60765807, -0.28129860, -0.81424303, -2.62720985,
-        -0.72413100, -0.65570381, -2.12305496, -1.01816317, -2.48063402, -0.38259915, -1.45147908,
-        -1.84310238, -0.91530284, -0.81807757, -1.31692881, -0.50887455, -2.03354147, -1.48767160,
-        -2.41779116, -0.37836019, -2.56853147, -0.56979429, -1.02803214};
+        -0.336904, -3.475825, -1.366154, -0.279366, -2.208430, -2.010934, -0.225511, -2.436562,
+        -2.167785, -1.572415, -1.784104, -0.470789, -1.067459, -1.801948, -0.711023, -2.307197,
+        -1.467087, -0.400681, -0.426983, -3.740518, -1.127681, -1.078919, -2.599005, -0.534965,
+        -2.561400, -0.567617, -1.033025, -2.097713, -0.520463, -1.262245, -1.763230, -2.607658,
+        -0.281299, -0.814243, -2.627210, -0.724131, -0.655704, -2.123055, -1.018163, -2.480634,
+        -0.382599, -1.451479, -1.843102, -0.915303, -0.818078, -1.316929, -0.508875, -2.033541,
+        -1.487672, -2.417791, -0.378360, -2.568531, -0.569794, -1.028032};

    migraphx::shape a_shape{migraphx::shape::float_type, {2, 3, 3, 3}};
    auto al  = p.add_literal(migraphx::literal{a_shape, a});
@@ -1124,40 +1120,6 @@ TEST_CASE(logsoftmax_test_axis_3)
    EXPECT(migraphx::verify_range(results_vector, s));
 }

-TEST_CASE(logsoftmax_test_axis_4)
-{
-    migraphx::program p;
-    std::vector<float> a = {
-        1.93885877,  -1.20006269, 0.90960855,  0.42108916,  -1.50797544, -1.31047913, 1.07816336,
-        -1.13288733, -0.86411064, 0.97800238,  0.76631385,  2.07962834,  -0.8940665,  -1.62855592,
-        -0.53763057, -1.48165117, -0.64154112, 0.42486547,  0.89330917,  -2.42022666, 0.192611,
-        -0.01257413, -1.5326607,  0.53137897,  -1.52383859, 0.46994381,  0.00453619,  0.0066996,
-        1.58394908,  0.84216752,  -0.04137941, -0.88580789, 1.44055158,  -0.17621241, -1.98917923,
-        -0.08610038, 0.79020567,  -0.67714548, 0.42774631,  0.1376574,   2.23569227,  1.16681234,
-        -1.21191456, -0.28411502, -0.18688975, 1.67552548,  2.48357974,  0.95891282,  -0.06616535,
-        -0.99628491, 1.04314606,  -1.22943315, 0.76930403,  0.31106618};
-
-    std::vector<float> s = {0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000,
-                            0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000,
-                            0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000,
-                            0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000,
-                            0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000,
-                            0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000,
-                            0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000,
-                            0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000,
-                            0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000};
-
-    migraphx::shape a_shape{migraphx::shape::float_type, {2, 3, 3, 3}};
-    auto al  = p.add_literal(migraphx::literal{a_shape, a});
-    int axis = 4;
-    p.add_instruction(migraphx::op::logsoftmax{axis}, al);
-    p.compile(migraphx::cpu::target{});
-    auto result = p.eval({});
-    std::vector<float> results_vector;
-    result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
-    EXPECT(migraphx::verify_range(results_vector, s));
-}
-
 TEST_CASE(conv2d_test)
 {
    migraphx::program p;

--- a/test/gpu/miopen.cpp
+++ b/test/gpu/miopen.cpp
@@ -3326,7 +3326,6 @@ template struct test_logsoftmax<0>;
 template struct test_logsoftmax<1>;
 template struct test_logsoftmax<2>;
 template struct test_logsoftmax<3>;
-template struct test_logsoftmax<4>;

 template <int Axis>
 struct test_logsoftmax_1 : verify_program<test_logsoftmax_1<Axis>>
@@ -3343,7 +3342,6 @@ struct test_logsoftmax_1 : verify_program<test_logsoftmax_1<Axis>>
 };

 template struct test_logsoftmax_1<0>;
-template struct test_logsoftmax_1<1>;

 struct test_fp32_fp16_lall : verify_program<test_fp32_fp16_lall>
 {

--- a/test/op_shape_test.cpp
+++ b/test/op_shape_test.cpp
@@ -383,14 +383,6 @@ TEST_CASE(logsoftmax)
    {
        migraphx::shape input{migraphx::shape::float_type, {2, 3, 4, 5}};
        int axis = 4;
-        expect_shape(migraphx::shape{migraphx::shape::float_type, {2, 3, 4, 5}},
-                     migraphx::op::logsoftmax{axis},
-                     input);
-    }
-
-    {
-        migraphx::shape input{migraphx::shape::float_type, {2, 3, 4, 5}};
-        int axis = 5;
        throws_shape(migraphx::op::logsoftmax{axis}, input);
    }