Merge branch 'develop' into optimize

913ae362 · Chris Austen · GitHub · f1e16656 · b8c8d09b · 913ae362
Unverified Commit 913ae362 authored Dec 13, 2022 by Chris Austen Committed by GitHub Dec 13, 2022
20 changed files
--- a/src/targets/ref/lowering.cpp
+++ b/src/targets/ref/lowering.cpp
@@ -383,9 +383,9 @@ struct ref_gemm
    std::string name() const { return "ref::dot"; }
    shape compute_shape(const std::vector<shape>& inputs) const { return op.compute_shape(inputs); }
-    argument compute(context&, const shape& output_shape, std::vector<argument> args) const
+    argument compute(context&, const dyn_output& dyn_out, std::vector<argument> args) const
    {
-        argument result{output_shape};
+        argument result{dyn_out.computed_shape};
        migemm(result, args[0], args[1], 1.0f, 0.0f);
        return result;
@@ -449,10 +449,10 @@ struct ref_softmax : auto_register_op<ref_softmax<Op>>
    {
        return op.normalize_compute_shape(inputs);
    }
-    argument compute(context&, const shape& output_shape, std::vector<argument> args) const
+    argument compute(context&, const dyn_output& dyn_out, std::vector<argument> args) const
    {
-        argument result{output_shape};
+        argument result{dyn_out.computed_shape};
-        auto batch_lens        = output_shape.lens();
+        auto batch_lens        = dyn_out.computed_shape.lens();
        int64_t tuned_axis     = tune_axis(args[0].get_shape().lens().size(), op.axis, op.name());
        std::size_t n_dims     = batch_lens[tuned_axis];
        batch_lens[tuned_axis] = 1;
@@ -475,7 +475,7 @@ struct ref_softmax : auto_register_op<ref_softmax<Op>>
                for(std::size_t j = 0; j < n_dims; ++j)
                {
                    idx[tuned_axis]   = j;
-                    std::size_t index = output_shape.index(idx);
+                    std::size_t index = dyn_out.computed_shape.index(idx);
                    output[index]     = std::exp(input[index] - batch_max[i]);
                }

--- a/test/gpu/hip.cpp
+++ b/test/gpu/hip.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include <test.hpp>
+#include <migraphx/argument.hpp>
+#include <migraphx/gpu/hip.hpp>
+#include <migraphx/gpu/target.hpp>
+TEST_CASE(tuple_to_from_gpu)
+{
+    migraphx::shape s1{migraphx::shape::float_type, {2, 3}};
+    migraphx::shape s2{migraphx::shape::int32_type, {2, 4}};
+    std::vector<float> p1_data = {1.1, 2.2, 3.3, 4.4, 5.5, 6.6};
+    std::vector<int> p2_data   = {1, 2, 3, 4, 5, 6, 7, 8};
+    auto p1                    = migraphx::argument{s1, p1_data.data()};
+    auto p2                    = migraphx::argument{s2, p2_data.data()};
+    auto p1_gpu                = migraphx::gpu::to_gpu(p1);
+    auto p2_gpu                = migraphx::gpu::to_gpu(p2);
+    auto p_tuple               = migraphx::gpu::from_gpu(migraphx::argument({p1_gpu, p2_gpu}));
+    std::vector<migraphx::argument> results = p_tuple.get_sub_objects();
+    std::vector<float> result1;
+    results[0].visit([&](auto output) { result1.assign(output.begin(), output.end()); });
+    std::vector<int> result2;
+    results[1].visit([&](auto output) { result2.assign(output.begin(), output.end()); });
+    EXPECT(result1 == p1_data);
+    EXPECT(result2 == p2_data);
+}
+int main(int argc, const char* argv[]) { test::run(argc, argv); }
--- a/test/gpu/mlir.cpp
+++ b/test/gpu/mlir.cpp
@@ -140,7 +140,7 @@ TEST_CASE(conv)
 {
    const std::string mlir_output = R"__migraphx__(
 module {
-  func.func @main(%arg0: tensor<2x8x3x3xf32>, %arg1: tensor<1x8x4x4xf32>) -> tensor<1x2x2x2xf32> attributes {kernel = "mixr"} {
+  func.func @main(%arg0: tensor<2x8x3x3xf32>, %arg1: tensor<1x8x4x4xf32>) -> tensor<1x2x2x2xf32> attributes {arch = "", kernel = "mixr"} {
    %0 = migraphx.convolution(%arg1, %arg0) {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : (tensor<1x8x4x4xf32>, tensor<2x8x3x3xf32>) -> tensor<1x2x2x2xf32>
    return %0 : tensor<1x2x2x2xf32>
  }
@@ -163,7 +163,7 @@ TEST_CASE(conv_add_relu)
 {
    const std::string mlir_output = R"__migraphx__(
 module {
-  func.func @main(%arg0: tensor<1x2x2x2xf32>, %arg1: tensor<2x8x3x3xf32>, %arg2: tensor<1x8x4x4xf32>) -> tensor<1x2x2x2xf32> attributes {kernel = "mixr"} {
+  func.func @main(%arg0: tensor<1x2x2x2xf32>, %arg1: tensor<2x8x3x3xf32>, %arg2: tensor<1x8x4x4xf32>) -> tensor<1x2x2x2xf32> attributes {arch = "", kernel = "mixr"} {
    %0 = migraphx.convolution(%arg2, %arg1) {dilation = [1, 1], group = 1 : i64, padding = [0, 0, 0, 0], padding_mode = 0 : i64, stride = [1, 1]} : (tensor<1x8x4x4xf32>, tensor<2x8x3x3xf32>) -> tensor<1x2x2x2xf32>
    %1 = migraphx.add(%0, %arg0) : (tensor<1x2x2x2xf32>, tensor<1x2x2x2xf32>) -> tensor<1x2x2x2xf32>
    %2 = migraphx.relu(%1) : (tensor<1x2x2x2xf32>) -> tensor<1x2x2x2xf32>

--- a/src/include/migraphx/int_divide.hpp
+++ b/src/include/migraphx/int_divide.hpp
@@ -21,28 +21,31 @@
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
-#ifndef MIGRAPHX_GUARD_RTGLIB_INT_DIVIDE_HPP
-#define MIGRAPHX_GUARD_RTGLIB_INT_DIVIDE_HPP
-#include <migraphx/config.hpp>
+#include <migraphx/instruction.hpp>
-#include <cmath>
+#include <migraphx/program.hpp>
+#include <migraphx/make_op.hpp>
+#include "test.hpp"
-namespace migraphx {
+TEST_CASE(check_undefined)
-inline namespace MIGRAPHX_INLINE_NS {
-template <class R, class T, class U>
-R floor_divide(T x, U y)
 {
-    return R(std::floor(double(x) / double(y)));
+    migraphx::module m;
-}
+    auto und = m.add_instruction(migraphx::make_op("undefined"));
+    auto cov = m.add_instruction(
+        migraphx::make_op("convert", {{"target_type", migraphx::shape::half_type}}), und);
+    auto abs = m.add_instruction(migraphx::make_op("abs"), cov);
-template <class R, class T, class U>
+    migraphx::shape xs{migraphx::shape::float_type, {2, 3}};
-R ceil_divide(T x, U y)
+    std::vector<float> datax = {1, 2, 3, 4, 5, 6};
-{
-    return R(std::ceil(double(x) / double(y)));
+    auto lit = m.add_literal(migraphx::literal(xs, datax));
-}
+    auto mul = m.add_instruction(migraphx::make_op("mul"), lit, lit);
-} // namespace MIGRAPHX_INLINE_NS
+    EXPECT(und->is_undefined());
-} // namespace migraphx
+    EXPECT(cov->is_undefined());
+    EXPECT(abs->is_undefined());
+    EXPECT(not lit->is_undefined());
+    EXPECT(not mul->is_undefined());
+}
-#endif
+int main(int argc, const char* argv[]) { test::run(argc, argv); }
--- a/test/literal_test.cpp
+++ b/test/literal_test.cpp
@@ -49,6 +49,25 @@ TEST_CASE(literal_test)
    EXPECT(l4.empty());
 }
+TEST_CASE(literal_nstd_shape_vector)
+{
+    migraphx::shape nstd_shape{migraphx::shape::float_type, {1, 3, 2, 2}, {12, 1, 6, 3}};
+    std::vector<float> data(12);
+    std::iota(data.begin(), data.end(), 0);
+    auto l0 = migraphx::literal{nstd_shape, data};
+    // check data buffer is read in correctly
+    std::vector<float> expected_buffer = {0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11};
+    const auto* start                  = reinterpret_cast<const float*>(l0.data());
+    std::vector<float> l0_data{start, start + 12};
+    EXPECT(l0_data == expected_buffer);
+    // check that using visit() (that uses a tensor view) gives data in correct order
+    std::vector<float> results_vector(12);
+    l0.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
+    EXPECT(results_vector == data);
+}
 TEST_CASE(literal_os1)
 {
    migraphx::literal l{1};

--- a/test/onnx/argmax_dyn_test.onnx
+++ b/test/onnx/argmax_dyn_test.onnx
--- a/test/onnx/averagepool_dyn_asym_padding_error_test.onnx
+++ b/test/onnx/averagepool_dyn_asym_padding_error_test.onnx
--- a/test/onnx/averagepool_dyn_autopad_error_test.onnx
+++ b/test/onnx/averagepool_dyn_autopad_error_test.onnx
--- a/test/onnx/averagepool_dyn_cip_error_test.onnx
+++ b/test/onnx/averagepool_dyn_cip_error_test.onnx
--- a/test/onnx/averagepool_dyn_test.onnx
+++ b/test/onnx/averagepool_dyn_test.onnx
--- a/test/onnx/external_constant_test.onnx
+++ b/test/onnx/external_constant_test.onnx
+external_constant_test:¡
+v0"Constant*g
+value*[Bconst_tensorj)
+locationexternal_constant_test.weightj
+offset48j
+length24p external_constant_testb
+0
+B
\ No newline at end of file
--- a/test/onnx/external_constant_test.weight
+++ b/test/onnx/external_constant_test.weight
--- a/test/onnx/flatten_dyn_test.onnx
+++ b/test/onnx/flatten_dyn_test.onnx
--- a/test/onnx/gen_onnx.py
+++ b/test/onnx/gen_onnx.py
--- a/test/onnx/globalavgpool_dyn_test.onnx
+++ b/test/onnx/globalavgpool_dyn_test.onnx
--- a/test/onnx/globallppool_dyn_test.onnx
+++ b/test/onnx/globallppool_dyn_test.onnx
--- a/test/onnx/globalmaxpool_dyn_test.onnx
+++ b/test/onnx/globalmaxpool_dyn_test.onnx
--- a/test/onnx/onnx_test.cpp
+++ b/test/onnx/onnx_test.cpp
--- a/test/onnx/softmax_dyn_test.onnx
+++ b/test/onnx/softmax_dyn_test.onnx
--- a/test/onnx/squeeze_unsqueeze_dyn_test.onnx
+++ b/test/onnx/squeeze_unsqueeze_dyn_test.onnx