Updates for RC1 (#1425)

* rearrange default pass list; adjust_allocation must be run after rep… (#1418) * Regenerate driver models (#1422) * Add support in mlir for transposed and broadcasted shaped (#1378) * Add relaxed standard shape assertion (#1416) Co-authored-by: Brian Pickrell <95253842+bpickrel@users.noreply.github.com> Co-authored-by: kahmed10 <15948690+kahmed10@users.noreply.github.com> Co-authored-by: Paul Fultz II <pfultz2@yahoo.com> Co-authored-by: jungpark-mlir <jungwook.park@amd.com>

Updates for RC1 (#1425)
* rearrange default pass list; adjust_allocation must be run after rep… (#1418) * Regenerate driver models (#1422) * Add support in mlir for transposed and broadcasted shaped (#1378) * Add relaxed standard shape assertion (#1416) Co-authored-by: Brian Pickrell <95253842+bpickrel@users.noreply.github.com> Co-authored-by: kahmed10 <15948690+kahmed10@users.noreply.github.com> Co-authored-by: Paul Fultz II <pfultz2@yahoo.com> Co-authored-by: jungpark-mlir <jungwook.park@amd.com>
360b1801 · Chris Austen · GitHub · 83784c52 · 360b1801 · 360b1801
Unverified Commit 360b1801 authored Oct 28, 2022 by Chris Austen Committed by GitHub Oct 28, 2022
14 changed files
--- a/Dockerfile
+++ b/Dockerfile
@@ -86,7 +86,7 @@ RUN git clone --single-branch --branch ${ONNXRUNTIME_BRANCH} --recursive ${ONNXR
 ADD tools/build_and_test_onnxrt.sh /onnxruntime/build_and_test_onnxrt.sh
-RUN cget -p /usr/local install ROCmSoftwarePlatform/llvm-project-mlir@e8e77eb16be413d301ea8509726d47f265d9011f -DBUILD_MIXR_TARGET=On
+RUN cget -p /usr/local install ROCmSoftwarePlatform/llvm-project-mlir@c0723a7e50043d973cb73ae51dc30d36679ee7e5 -DBUILD_MIXR_TARGET=On
 ENV MIOPEN_FIND_DB_PATH=/tmp/miopen/find-db
 ENV MIOPEN_USER_DB_PATH=/tmp/miopen/user-db

--- a/src/driver/alexnet.cpp
+++ b/src/driver/alexnet.cpp
@@ -25,7 +25,6 @@
 #include <migraphx/make_op.hpp>
 #include <migraphx/program.hpp>
 #include <migraphx/generate.hpp>
-#include <migraphx/json.hpp>
 #include "models.hpp"
 namespace migraphx {
 namespace driver {
@@ -40,161 +39,153 @@ migraphx::program alexnet(unsigned batch) // NOLINT(readability-function-size)
        migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {1}}, 1)));
    auto x_main_module_2       = mmain->add_literal(migraphx::abs(
        migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {1}}, 2)));
-    auto x_data_0              = mmain->add_parameter(
+    auto x_0                   = mmain->add_parameter(
-        "data_0", migraphx::shape{migraphx::shape::float_type, {batch, 3, 224, 224}});
+        "0", migraphx::shape{migraphx::shape::float_type, {batch, 3, 224, 224}});
    auto x_main_module_4 = mmain->add_literal(
-        migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {1000, 4096}}, 3));
+        migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {1000}}, 3));
    auto x_main_module_5 = mmain->add_literal(
-        migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {1000}}, 4));
+        migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {1000, 4096}}, 4));
    auto x_main_module_6 = mmain->add_literal(
-        migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {4096, 4096}}, 5));
+        migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {4096}}, 5));
-    auto x_main_module_7 = mmain->add_literal(migraphx::abs(
+    auto x_main_module_7 = mmain->add_literal(
-        migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {4096}}, 6)));
+        migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {4096, 4096}}, 6));
    auto x_main_module_8 = mmain->add_literal(
-        migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {4096, 9216}}, 7));
+        migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {4096}}, 7));
    auto x_main_module_9 = mmain->add_literal(
-        migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {4096}}, 8));
+        migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {4096, 9216}}, 8));
-    auto x_main_module_10 = mmain->add_literal(migraphx::generate_literal(
+    auto x_main_module_10 = mmain->add_literal(
-        migraphx::shape{migraphx::shape::float_type, {256, 192, 3, 3}}, 9));
+        migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {256}}, 9));
-    auto x_main_module_11 = mmain->add_literal(
+    auto x_main_module_11 = mmain->add_literal(migraphx::generate_literal(
-        migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {256}}, 10));
+        migraphx::shape{migraphx::shape::float_type, {256, 256, 3, 3}}, 10));
-    auto x_main_module_12 = mmain->add_literal(migraphx::generate_literal(
+    auto x_main_module_12 = mmain->add_literal(
-        migraphx::shape{migraphx::shape::float_type, {384, 192, 3, 3}}, 11));
+        migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {256}}, 11));
-    auto x_main_module_13 = mmain->add_literal(
+    auto x_main_module_13 = mmain->add_literal(migraphx::generate_literal(
-        migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {384}}, 12));
+        migraphx::shape{migraphx::shape::float_type, {256, 384, 3, 3}}, 12));
-    auto x_main_module_14 = mmain->add_literal(migraphx::generate_literal(
+    auto x_main_module_14 = mmain->add_literal(
-        migraphx::shape{migraphx::shape::float_type, {384, 256, 3, 3}}, 13));
+        migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {384}}, 13));
-    auto x_main_module_15 = mmain->add_literal(
+    auto x_main_module_15 = mmain->add_literal(migraphx::generate_literal(
-        migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {384}}, 14));
+        migraphx::shape{migraphx::shape::float_type, {384, 192, 3, 3}}, 14));
-    auto x_main_module_16 = mmain->add_literal(migraphx::generate_literal(
+    auto x_main_module_16 = mmain->add_literal(
-        migraphx::shape{migraphx::shape::float_type, {256, 48, 5, 5}}, 15));
+        migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {192}}, 15));
-    auto x_main_module_17 = mmain->add_literal(migraphx::abs(
+    auto x_main_module_17 = mmain->add_literal(migraphx::generate_literal(
-        migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {256}}, 16)));
+        migraphx::shape{migraphx::shape::float_type, {192, 64, 5, 5}}, 16));
-    auto x_main_module_18 = mmain->add_literal(migraphx::generate_literal(
+    auto x_main_module_18 = mmain->add_literal(
-        migraphx::shape{migraphx::shape::float_type, {96, 3, 11, 11}}, 17));
+        migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {64}}, 17));
-    auto x_main_module_19 = mmain->add_literal(
+    auto x_main_module_19 = mmain->add_literal(migraphx::generate_literal(
-        migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {96}}, 18));
+        migraphx::shape{migraphx::shape::float_type, {64, 3, 11, 11}}, 18));
    auto x_main_module_20 = mmain->add_instruction(
-        migraphx::make_json_op("convolution",
+        migraphx::make_json_op(
-                               "{dilation:[1,1],group:1,padding:[0,0,0,0],padding_mode:0,stride:[4,"
+            "convolution",
-                               "4],use_dynamic_same_auto_pad:0}"),
+            "{dilation:[1,1],group:1,padding:[2,2,2,2],padding_mode:0,stride:[4,4]}"),
-        x_data_0,
+        x_0,
-        x_main_module_18);
+        x_main_module_19);
    auto x_main_module_21 = mmain->add_instruction(
-        migraphx::make_json_op("broadcast", "{axis:1,out_lens:[1,96,54,54]}"), x_main_module_19);
+        migraphx::make_json_op("broadcast", "{axis:1,out_lens:[1,64,55,55]}"), x_main_module_18);
    auto x_main_module_22 =
        mmain->add_instruction(migraphx::make_op("add"), x_main_module_20, x_main_module_21);
    auto x_main_module_23 = mmain->add_instruction(migraphx::make_op("relu"), x_main_module_22);
    auto x_main_module_24 = mmain->add_instruction(
-        migraphx::make_json_op("lrn", "{alpha:9.999999747378752e-05,beta:0.75,bias:1.0,size:5}"),
+        migraphx::make_json_op(
+            "pooling",
+            "{ceil_mode:0,lengths:[3,3],lp_order:2,mode:1,padding:[0,0,0,0],stride:[2,2]}"),
        x_main_module_23);
    auto x_main_module_25 = mmain->add_instruction(
+        migraphx::make_json_op(
+            "convolution",
+            "{dilation:[1,1],group:1,padding:[2,2,2,2],padding_mode:0,stride:[1,1]}"),
+        x_main_module_24,
+        x_main_module_17);
+    auto x_main_module_26 = mmain->add_instruction(
+        migraphx::make_json_op("broadcast", "{axis:1,out_lens:[1,192,27,27]}"), x_main_module_16);
+    auto x_main_module_27 =
+        mmain->add_instruction(migraphx::make_op("add"), x_main_module_25, x_main_module_26);
+    auto x_main_module_28 = mmain->add_instruction(migraphx::make_op("relu"), x_main_module_27);
+    auto x_main_module_29 = mmain->add_instruction(
        migraphx::make_json_op(
            "pooling",
            "{ceil_mode:0,lengths:[3,3],lp_order:2,mode:1,padding:[0,0,0,0],stride:[2,2]}"),
-        x_main_module_24);
+        x_main_module_28);
-    auto x_main_module_26 = mmain->add_instruction(
-        migraphx::make_json_op("convolution",
-                               "{dilation:[1,1],group:2,padding:[2,2,2,2],padding_mode:0,stride:[1,"
-                               "1],use_dynamic_same_auto_pad:0}"),
-        x_main_module_25,
-        x_main_module_16);
-    auto x_main_module_27 = mmain->add_instruction(
-        migraphx::make_json_op("broadcast", "{axis:1,out_lens:[1,256,26,26]}"), x_main_module_17);
-    auto x_main_module_28 =
-        mmain->add_instruction(migraphx::make_op("add"), x_main_module_26, x_main_module_27);
-    auto x_main_module_29 = mmain->add_instruction(migraphx::make_op("relu"), x_main_module_28);
    auto x_main_module_30 = mmain->add_instruction(
-        migraphx::make_json_op("lrn", "{alpha:9.999999747378752e-05,beta:0.75,bias:1.0,size:5}"),
+        migraphx::make_json_op(
-        x_main_module_29);
+            "convolution",
+            "{dilation:[1,1],group:1,padding:[1,1,1,1],padding_mode:0,stride:[1,1]}"),
+        x_main_module_29,
+        x_main_module_15);
    auto x_main_module_31 = mmain->add_instruction(
+        migraphx::make_json_op("broadcast", "{axis:1,out_lens:[1,384,13,13]}"), x_main_module_14);
+    auto x_main_module_32 =
+        mmain->add_instruction(migraphx::make_op("add"), x_main_module_30, x_main_module_31);
+    auto x_main_module_33 = mmain->add_instruction(migraphx::make_op("relu"), x_main_module_32);
+    auto x_main_module_34 = mmain->add_instruction(
        migraphx::make_json_op(
-            "pooling",
+            "convolution",
-            "{ceil_mode:0,lengths:[3,3],lp_order:2,mode:1,padding:[0,0,0,0],stride:[2,2]}"),
+            "{dilation:[1,1],group:1,padding:[1,1,1,1],padding_mode:0,stride:[1,1]}"),
-        x_main_module_30);
+        x_main_module_33,
-    auto x_main_module_32 = mmain->add_instruction(
+        x_main_module_13);
-        migraphx::make_json_op("convolution",
+    auto x_main_module_35 = mmain->add_instruction(
-                               "{dilation:[1,1],group:1,padding:[1,1,1,1],padding_mode:0,stride:[1,"
+        migraphx::make_json_op("broadcast", "{axis:1,out_lens:[1,256,13,13]}"), x_main_module_12);
-                               "1],use_dynamic_same_auto_pad:0}"),
+    auto x_main_module_36 =
-        x_main_module_31,
+        mmain->add_instruction(migraphx::make_op("add"), x_main_module_34, x_main_module_35);
-        x_main_module_14);
+    auto x_main_module_37 = mmain->add_instruction(migraphx::make_op("relu"), x_main_module_36);
-    auto x_main_module_33 = mmain->add_instruction(
+    auto x_main_module_38 = mmain->add_instruction(
-        migraphx::make_json_op("broadcast", "{axis:1,out_lens:[1,384,12,12]}"), x_main_module_15);
+        migraphx::make_json_op(
-    auto x_main_module_34 =
+            "convolution",
-        mmain->add_instruction(migraphx::make_op("add"), x_main_module_32, x_main_module_33);
+            "{dilation:[1,1],group:1,padding:[1,1,1,1],padding_mode:0,stride:[1,1]}"),
-    auto x_main_module_35 = mmain->add_instruction(migraphx::make_op("relu"), x_main_module_34);
+        x_main_module_37,
-    auto x_main_module_36 = mmain->add_instruction(
+        x_main_module_11);
-        migraphx::make_json_op("convolution",
+    auto x_main_module_39 = mmain->add_instruction(
-                               "{dilation:[1,1],group:2,padding:[1,1,1,1],padding_mode:0,stride:[1,"
+        migraphx::make_json_op("broadcast", "{axis:1,out_lens:[1,256,13,13]}"), x_main_module_10);
-                               "1],use_dynamic_same_auto_pad:0}"),
+    auto x_main_module_40 =
-        x_main_module_35,
+        mmain->add_instruction(migraphx::make_op("add"), x_main_module_38, x_main_module_39);
-        x_main_module_12);
+    auto x_main_module_41 = mmain->add_instruction(migraphx::make_op("relu"), x_main_module_40);
-    auto x_main_module_37 = mmain->add_instruction(
+    auto x_main_module_42 = mmain->add_instruction(
-        migraphx::make_json_op("broadcast", "{axis:1,out_lens:[1,384,12,12]}"), x_main_module_13);
-    auto x_main_module_38 =
-        mmain->add_instruction(migraphx::make_op("add"), x_main_module_36, x_main_module_37);
-    auto x_main_module_39 = mmain->add_instruction(migraphx::make_op("relu"), x_main_module_38);
-    auto x_main_module_40 = mmain->add_instruction(
-        migraphx::make_json_op("convolution",
-                               "{dilation:[1,1],group:2,padding:[1,1,1,1],padding_mode:0,stride:[1,"
-                               "1],use_dynamic_same_auto_pad:0}"),
-        x_main_module_39,
-        x_main_module_10);
-    auto x_main_module_41 = mmain->add_instruction(
-        migraphx::make_json_op("broadcast", "{axis:1,out_lens:[1,256,12,12]}"), x_main_module_11);
-    auto x_main_module_42 =
-        mmain->add_instruction(migraphx::make_op("add"), x_main_module_40, x_main_module_41);
-    auto x_main_module_43 = mmain->add_instruction(migraphx::make_op("relu"), x_main_module_42);
-    auto x_main_module_44 = mmain->add_instruction(
        migraphx::make_json_op(
            "pooling",
-            "{ceil_mode:0,lengths:[3,3],lp_order:2,mode:1,padding:[0,0,1,1],stride:[2,2]}"),
+            "{ceil_mode:0,lengths:[3,3],lp_order:2,mode:1,padding:[0,0,0,0],stride:[2,2]}"),
-        x_main_module_43);
+        x_main_module_41);
+    auto x_main_module_43 =
+        mmain->add_instruction(migraphx::make_json_op("flatten", "{axis:1}"), x_main_module_42);
+    auto x_main_module_44 = mmain->add_instruction(migraphx::make_op("identity"), x_main_module_43);
    auto x_main_module_45 = mmain->add_instruction(
-        migraphx::make_json_op("reshape", "{dims:[1,9216]}"), x_main_module_44);
+        migraphx::make_json_op("transpose", "{permutation:[1,0]}"), x_main_module_9);
-    auto x_main_module_46 = mmain->add_instruction(
+    auto x_main_module_46 =
-        migraphx::make_json_op("transpose", "{permutation:[1,0]}"), x_main_module_8);
+        mmain->add_instruction(migraphx::make_op("dot"), x_main_module_44, x_main_module_45);
-    auto x_main_module_47 =
+    auto x_main_module_47 = mmain->add_instruction(
-        mmain->add_instruction(migraphx::make_op("dot"), x_main_module_45, x_main_module_46);
+        migraphx::make_json_op("multibroadcast", "{out_lens:[1,4096]}"), x_main_module_8);
    auto x_main_module_48 = mmain->add_instruction(
-        migraphx::make_json_op("multibroadcast", "{out_lens:[1,4096]}"), x_main_module_9);
-    auto x_main_module_49 = mmain->add_instruction(
        migraphx::make_json_op("multibroadcast", "{out_lens:[1,4096]}"), x_main_module_2);
+    auto x_main_module_49 =
+        mmain->add_instruction(migraphx::make_op("mul"), x_main_module_47, x_main_module_48);
    auto x_main_module_50 =
-        mmain->add_instruction(migraphx::make_op("mul"), x_main_module_48, x_main_module_49);
+        mmain->add_instruction(migraphx::make_op("add"), x_main_module_46, x_main_module_49);
-    auto x_main_module_51 =
+    auto x_main_module_51 = mmain->add_instruction(migraphx::make_op("relu"), x_main_module_50);
-        mmain->add_instruction(migraphx::make_op("add"), x_main_module_47, x_main_module_50);
+    auto x_main_module_52 = mmain->add_instruction(migraphx::make_op("identity"), x_main_module_51);
-    auto x_main_module_52 = mmain->add_instruction(migraphx::make_op("relu"), x_main_module_51);
+    auto x_main_module_53 = mmain->add_instruction(
-    auto x_main_module_53 = mmain->add_instruction(migraphx::make_op("identity"), x_main_module_52);
+        migraphx::make_json_op("transpose", "{permutation:[1,0]}"), x_main_module_7);
-    auto x_main_module_54 = mmain->add_instruction(
+    auto x_main_module_54 =
-        migraphx::make_json_op("transpose", "{permutation:[1,0]}"), x_main_module_6);
+        mmain->add_instruction(migraphx::make_op("dot"), x_main_module_52, x_main_module_53);
-    auto x_main_module_55 =
+    auto x_main_module_55 = mmain->add_instruction(
-        mmain->add_instruction(migraphx::make_op("dot"), x_main_module_53, x_main_module_54);
+        migraphx::make_json_op("multibroadcast", "{out_lens:[1,4096]}"), x_main_module_6);
    auto x_main_module_56 = mmain->add_instruction(
-        migraphx::make_json_op("multibroadcast", "{out_lens:[1,4096]}"), x_main_module_7);
-    auto x_main_module_57 = mmain->add_instruction(
        migraphx::make_json_op("multibroadcast", "{out_lens:[1,4096]}"), x_main_module_1);
+    auto x_main_module_57 =
+        mmain->add_instruction(migraphx::make_op("mul"), x_main_module_55, x_main_module_56);
    auto x_main_module_58 =
-        mmain->add_instruction(migraphx::make_op("mul"), x_main_module_56, x_main_module_57);
+        mmain->add_instruction(migraphx::make_op("add"), x_main_module_54, x_main_module_57);
-    auto x_main_module_59 =
+    auto x_main_module_59 = mmain->add_instruction(migraphx::make_op("relu"), x_main_module_58);
-        mmain->add_instruction(migraphx::make_op("add"), x_main_module_55, x_main_module_58);
+    auto x_main_module_60 = mmain->add_instruction(
-    auto x_main_module_60 = mmain->add_instruction(migraphx::make_op("relu"), x_main_module_59);
+        migraphx::make_json_op("transpose", "{permutation:[1,0]}"), x_main_module_5);
-    auto x_main_module_61 = mmain->add_instruction(migraphx::make_op("identity"), x_main_module_60);
+    auto x_main_module_61 =
+        mmain->add_instruction(migraphx::make_op("dot"), x_main_module_59, x_main_module_60);
    auto x_main_module_62 = mmain->add_instruction(
-        migraphx::make_json_op("transpose", "{permutation:[1,0]}"), x_main_module_4);
+        migraphx::make_json_op("multibroadcast", "{out_lens:[1,1000]}"), x_main_module_4);
-    auto x_main_module_63 =
+    auto x_main_module_63 = mmain->add_instruction(
-        mmain->add_instruction(migraphx::make_op("dot"), x_main_module_61, x_main_module_62);
-    auto x_main_module_64 = mmain->add_instruction(
-        migraphx::make_json_op("multibroadcast", "{out_lens:[1,1000]}"), x_main_module_5);
-    auto x_main_module_65 = mmain->add_instruction(
        migraphx::make_json_op("multibroadcast", "{out_lens:[1,1000]}"), x_main_module_0);
-    auto x_main_module_66 =
+    auto x_main_module_64 =
-        mmain->add_instruction(migraphx::make_op("mul"), x_main_module_64, x_main_module_65);
+        mmain->add_instruction(migraphx::make_op("mul"), x_main_module_62, x_main_module_63);
-    auto x_main_module_67 =
+    auto x_main_module_65 =
-        mmain->add_instruction(migraphx::make_op("add"), x_main_module_63, x_main_module_66);
+        mmain->add_instruction(migraphx::make_op("add"), x_main_module_61, x_main_module_64);
-    auto x_main_module_68 =
+    mmain->add_return({x_main_module_65});
-        mmain->add_instruction(migraphx::make_json_op("softmax", "{axis:1}"), x_main_module_67);
-    mmain->add_return({x_main_module_68});
    return p;
 }

--- a/src/driver/inceptionv3.cpp
+++ b/src/driver/inceptionv3.cpp
--- a/src/driver/resnet50.cpp
+++ b/src/driver/resnet50.cpp
--- a/src/driver/verify.cpp
+++ b/src/driver/verify.cpp
@@ -145,7 +145,7 @@ void verify_reduced(program p,
    auto* mm  = p.get_main_module();
    auto last = std::prev(mm->end(), n + 1);
    mm->remove_instructions(last, mm->end());
-    std::cout << "Verify: " << std::endl;
+    std::cout << "Verify: " << n << std::endl;
    std::cout << p << std::endl;
    verify_program(std::to_string(n), p, t, options, quantize, inputs, tolerance);
 }
@@ -159,6 +159,7 @@ void verify_reduced_program(const program& p,
 {
    const auto* mm = p.get_main_module();
    auto n         = std::distance(mm->begin(), mm->end());
+    std::cout << "Verify steps: " << n << std::endl;
    for(std::size_t i = 0; i < n; i++)
    {
        verify_reduced(p, i, t, options, quantize, inputs, tolerance);

--- a/src/targets/gpu/fuse_mlir.cpp
+++ b/src/targets/gpu/fuse_mlir.cpp
@@ -49,7 +49,7 @@ struct mlir_conv
    std::string name() const { return "gpu::mlir_conv"; }
    shape compute_shape(std::vector<shape> inputs, const std::vector<module_ref>& mods) const
    {
-        check_shapes{inputs, *this}.standard();
+        check_shapes{inputs, *this}.packed_or_broadcasted();
        if(mods.size() != 1)
            MIGRAPHX_THROW("should have one submodule.");
        if(inputs.size() < 2)
@@ -70,6 +70,9 @@ MIGRAPHX_PRED_MATCHER(is_mlir_conv, instruction_ref ins)
    auto group = v.at("group").to<int>();
    if(group != 1)
        return false;
+    // Avoid MLIR assertion: Index < Length && "Invalid index!"
+    if(ins->get_shape().lens().size() != 4)
+        return false;
    return true;
 }
@@ -96,9 +99,10 @@ struct find_conv_pointwise
                                   i.name());
           }))
            return;
-        // Only fuse with fp32 for now
+        // Only fuse with fp32/fp16
        if(std::any_of(ins->inputs().begin(), ins->inputs().end(), [&](auto i) {
-               return i->get_shape().type() != shape::type_t::float_type;
+               return not contains({shape::type_t::float_type, shape::type_t::half_type},
+                                   i->get_shape().type());
           }))
            return;
        std::sort(names.begin(), names.end());

--- a/src/targets/gpu/include/migraphx/gpu/mlir.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/mlir.hpp
@@ -36,7 +36,8 @@ struct module;
 namespace gpu {
 std::string dump_mlir(const module& m);
-code_object_op compile_mlir(const context& ctx, const module& m);
+code_object_op
+compile_mlir(const context& ctx, module m, const std::vector<instruction_ref>& inputs);
 instruction_ref insert_mlir(module& m,
                            instruction_ref ins,

--- a/src/targets/gpu/include/migraphx/gpu/perfdb.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/perfdb.hpp
@@ -41,7 +41,7 @@ struct problem_params
    shape output;
 };
-std::string get_mlir_perf_for_conv(const problem_params& pp);
+std::string get_mlir_perf_for_conv(const problem_params& pp, bool xdlops);
 } // namespace gpu
 } // namespace MIGRAPHX_INLINE_NS

--- a/src/targets/gpu/jit/mlir.cpp
+++ b/src/targets/gpu/jit/mlir.cpp
@@ -41,7 +41,7 @@ struct mlir_compiler : compiler<mlir_compiler>
    {
        auto* smod = ins->module_inputs().front();
        assert(smod->get_parameter_names().size() == ins->inputs().size() - 1);
-        return insert(compile_mlir(ctx, *smod));
+        return insert(compile_mlir(ctx, *smod, ins->inputs()));
    }
    compiler_replace insert(code_object_op co) const

--- a/src/targets/gpu/mlir.cpp
+++ b/src/targets/gpu/mlir.cpp
@@ -21,6 +21,7 @@
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */
+#include "migraphx/make_op.hpp"
 #include <migraphx/gpu/mlir.hpp>
 #ifdef MIGRAPHX_MLIR
@@ -43,8 +44,9 @@
 #include <migraphx/gpu/code_object_op.hpp>
 #include <migraphx/gpu/context.hpp>
 #include <migraphx/gpu/device_name.hpp>
-#include <migraphx/iterator_for.hpp>
 #include <migraphx/gpu/perfdb.hpp>
+#include <migraphx/iterator_for.hpp>
+#include <migraphx/permutation.hpp>
 #include <deque>
 #include <variant>
@@ -370,7 +372,11 @@ struct mlir_program
        mlir_operation_state& add_results(const std::vector<shape>& outputs)
        {
-            auto x = prog->make_tensors(outputs);
+            std::vector<shape> reshaped(outputs.size());
+            std::transform(outputs.begin(), outputs.end(), reshaped.begin(), [](const shape& r) {
+                return shape{r.type(), r.lens()};
+            });
+            auto x = prog->make_tensors(reshaped);
            mlirOperationStateAddResults(&op_state, x.size(), x.data());
            return *this;
        }
@@ -502,11 +508,12 @@ struct mlir_program
            {
                pp =
                    problem_params{ins->get_operator(), to_shapes(ins->inputs()), ins->get_shape()};
-                std::string tuned = get_tune_params();
+                // check if HW supports xdlops
+                bool xdlops       = contains(get_xdlops_archs(), target_name);
+                std::string tuned = get_tune_params(xdlops);
                if(not tuned.empty())
                    ops.add_attributes({{"perf_config", tuned}});
-                // check if HW supports xdlops
+                if(xdlops)
-                if(contains(get_xdlops_archs(), target_name))
                    ops.add_attributes({{"xdlopsV2", true}});
            }
@@ -571,7 +578,7 @@ struct mlir_program
        MIGRAPHX_THROW("Failed to compile mlir program");
    }
-    std::string get_tune_params() { return get_mlir_perf_for_conv(pp); }
+    std::string get_tune_params(bool xdlops) { return get_mlir_perf_for_conv(pp, xdlops); }
    mlir_context ctx;
    MlirLocation location;
@@ -589,8 +596,54 @@ std::string dump_mlir(const module& m)
    return mlir_print(&mlirOperationPrint, mod_op);
 }
-code_object_op compile_mlir(const context&, const module& m)
+void adjust_param_shapes(module& m, const std::vector<instruction_ref>& inputs)
+{
+    auto names = m.get_parameter_names();
+    std::sort(names.begin(), names.end());
+    for(auto i : range(names.size()))
+    {
+        const auto& name  = names[i];
+        const auto& input = inputs[i]->get_shape();
+        auto param        = m.get_parameter(name);
+        if(input.standard())
+            continue;
+        auto lens    = input.lens();
+        auto strides = input.strides();
+        std::vector<operation> ops;
+        if(input.transposed())
+        {
+            auto perm  = find_permutation(input);
+            auto iperm = invert_permutation(perm);
+            lens       = reorder_dims(lens, iperm);
+            strides    = reorder_dims(strides, iperm);
+            ops.push_back(make_op("transpose", {{"permutation", perm}}));
+        }
+        if(input.broadcasted())
+        {
+            std::transform(lens.begin(),
+                           lens.end(),
+                           strides.begin(),
+                           lens.begin(),
+                           [](auto len, auto stride) -> std::size_t {
+                               if(stride == 0)
+                                   return 1;
+                               return len;
+                           });
+            ops.push_back(make_op("multibroadcast", {{"out_lens", input.lens()}}));
+        }
+        auto new_param =
+            std::accumulate(ops.begin(),
+                            ops.end(),
+                            m.add_parameter(name + ".0", shape{input.type(), lens}),
+                            [&](auto x, auto op) { return m.insert_instruction(param, op, x); });
+        m.replace_instruction(param, new_param);
+        m.remove_instruction(param);
+    }
+}
+code_object_op compile_mlir(const context&, module m, const std::vector<instruction_ref>& inputs)
 {
+    adjust_param_shapes(m, inputs);
    const bool trace = enabled(MIGRAPHX_TRACE_MLIR{});
    if(trace)
        std::cout << m << std::endl;
@@ -662,13 +715,19 @@ instruction_ref insert_mlir(module& m,
 std::string dump_mlir(const module&) { return {}; }
-code_object_op compile_mlir(const context&, const module&) { return {}; }
 template <class T>
 void use(T&)
 {
 }
+// Disabling clang-tidy warning on non-real useage.
+// NOLINTBEGIN(performance-unnecessary-value-param)
+code_object_op compile_mlir(const context&, module, const std::vector<instruction_ref>&)
+{
+    return {};
+}
+// NOLINTEND(performance-unnecessary-value-param)
 instruction_ref
 // cppcheck-suppress funcArgNamesDifferent
 insert_mlir(module& m, instruction_ref, code_object_op co, const std::vector<instruction_ref>&)

--- a/src/targets/gpu/perfdb.cpp
+++ b/src/targets/gpu/perfdb.cpp
@@ -108,16 +108,17 @@ auto query_miopen_db(const std::string& query)
 } // namespace
-std::string get_mlir_perf_for_conv(const problem_params& pp)
+std::string get_mlir_perf_for_conv(const problem_params& pp, bool xdlops)
 {
+    std::string solver = xdlops ? "ConvMlirIgemmFwdXdlops" : "ConvMlirIgemmFwd";
    std::string query  = "select P.* \
                             from perf_db P, config C \
                             where P.config = C.id AND \
-                             P.solver = 'ConvMlirIgemmFwdXdlops' AND \
+                             P.solver = '${solver}' AND \
                             ${config}";
-    auto results =
+    auto results = query_miopen_db(
-        query_miopen_db(interpolate_string(query, {{"config", generate_miopen_config(pp)}}));
+        interpolate_string(query, {{"config", generate_miopen_config(pp)}, {"solver", solver}}));
    if(results.empty())
        return "";
    return results.front().at("params");

--- a/src/targets/gpu/target.cpp
+++ b/src/targets/gpu/target.cpp
@@ -138,12 +138,12 @@ std::vector<pass> target::get_passes(migraphx::context& gctx, const compile_opti
        dead_code_elimination{},
        pack_int8_args{},
        dead_code_elimination{},
-        adjust_allocation{gpu_allocation_model{}},
-        dead_code_elimination{},
        fuse_ops{&ctx, options.fast_math},
        dead_code_elimination{},
        replace_allocate{gpu_allocation_model{}, options.offload_copy},
        dead_code_elimination{},
+        adjust_allocation{gpu_allocation_model{}},
+        dead_code_elimination{},
        compile_ops{&ctx},
        dead_code_elimination{},
        write_literals{&ctx},

--- a/test/gpu/mlir.cpp
+++ b/test/gpu/mlir.cpp
@@ -84,7 +84,7 @@ migraphx::program create_program_from_mlir(const migraphx::module& mmlir)
    inputs.push_back(mm->add_parameter("output", mmlir.get_output_shapes().front()));
    migraphx::gpu::context ctx;
-    migraphx::gpu::insert_mlir(*mm, mm->end(), compile_mlir(ctx, mmlir), inputs);
+    migraphx::gpu::insert_mlir(*mm, mm->end(), compile_mlir(ctx, mmlir, inputs), inputs);
    return p;
 }

--- a/test/verify/test_shape_alloc.cpp
+++ b/test/verify/test_shape_alloc.cpp
+/*
+ * The MIT License (MIT)
+ *
+ * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#include "verify_program.hpp"
+#include <migraphx/program.hpp>
+#include <migraphx/generate.hpp>
+#include <migraphx/make_op.hpp>
+#include <migraphx/op/reduce_mean.hpp>
+/**
+ * @brief test_shape_alloc sets up a situation that could lead to an exception "convolution: Shapes
+ * are not in standard layout" if a "replace_allocate" compiler pass is not followed with
+ *   "adjust_allocation".  The last transpose instruction generates a shape with a stride of 1 in
+ *   the 2nd index, a non-standard layout that should be reallocated by adjust_allocation.
+ */
+struct test_shape_alloc : verify_program<test_shape_alloc>
+{
+    migraphx::program create_program() const
+    {
+        migraphx::program p;
+        auto* mm = p.get_main_module();
+        auto weights = mm->add_literal(migraphx::generate_literal(
+            migraphx::shape{migraphx::shape::float_type, {11, 8, 1, 1}, {8, 1, 1, 1}}));
+        auto x = mm->add_parameter("x", migraphx::shape{migraphx::shape::float_type, {1, 8, 7, 7}});
+        auto transpose1 =
+            mm->add_instruction(migraphx::make_op("transpose", {{"permutation", {0, 2, 3, 1}}}),
+                                x); //  -> float_type, {1, 7, 7, 8}, {392, 7, 1, 49}
+        auto reduce_ins =
+            mm->add_instruction(migraphx::make_op("reduce_mean", {{"axes", {1, 2}}}),
+                                transpose1); //  -> float_type, {1, 1, 1, 8}, {8, 8, 8, 1}
+        auto transpose2 =
+            mm->add_instruction(migraphx::make_op("transpose", {{"permutation", {0, 3, 1, 2}}}),
+                                reduce_ins); //  -> float_type, {1, 8, 1, 1}, {8, 1, 8, 8}
+        auto conv_op = migraphx::make_op("convolution");
+        mm->add_instruction(conv_op, transpose2, weights);
+        return p;
+    }
+};