Merge branch 'pointwise-nhwc' of...

Merge branch 'pointwise-nhwc' of https://github.com/ROCmSoftwarePlatform/AMDMIGraphX into nhwc_workaround

Merge branch 'pointwise-nhwc' of...
Merge branch 'pointwise-nhwc' of https://github.com/ROCmSoftwarePlatform/AMDMIGraphX into nhwc_workaround
f85ba189 · Khalique Ahmed · 122ffe97 · dfbab16e · f85ba189 · f85ba189
Commit f85ba189 authored Jul 18, 2023 by Khalique Ahmed
20 changed files
--- a/src/targets/gpu/include/migraphx/gpu/time_op.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/time_op.hpp
@@ -32,7 +32,7 @@ namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
-std::pair<double, double>
+MIGRAPHX_GPU_EXPORT std::pair<double, double>
 time_op(context& ictx, operation op, const std::vector<shape>& inputs, int n = 100);
 } // namespace gpu

--- a/src/targets/gpu/include/migraphx/gpu/write_literals.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/write_literals.hpp
@@ -32,7 +32,7 @@ struct module;
 namespace gpu {
-struct write_literals
+struct MIGRAPHX_GPU_EXPORT write_literals
 {
    context* ctx = nullptr;
    std::string name() const { return "gpu::write_literals"; }

--- a/src/targets/gpu/jit/pointwise.cpp
+++ b/src/targets/gpu/jit/pointwise.cpp
@@ -72,7 +72,7 @@ struct pointwise_compiler : compiler<pointwise_compiler>
        hip_compile_options options;
        options.inputs         = inputs;
        options.output         = inputs.back();
-        options.virtual_inputs = reduce_dims(inputs);
+        options.virtual_inputs = reduce_dims(normalize_permutation(inputs));
        options.params         = "-Wno-float-equal";
        auto axis              = find_fast_axis(options.virtual_inputs);
        auto vec               = vectorize::elements(ctx, axis, options.virtual_inputs);

--- a/src/targets/gpu/jit/reduce.cpp
+++ b/src/targets/gpu/jit/reduce.cpp
@@ -84,7 +84,7 @@ static shape get_reduced_shape(const shape& s, const std::vector<T>& axes)
    std::fill(lens.begin(), lens.end(), 1);
    for(const auto& axis : axes)
        lens[axis] = s.lens()[axis];
-    return shape{s.type(), lens};
+    return s.with_lens(lens);
 }
 template <class T>
@@ -93,7 +93,7 @@ static shape get_output_shape(const shape& s, const std::vector<T>& axes)
    auto lens = s.lens();
    for(const auto& axis : axes)
        lens[axis] = 1;
-    return shape{s.type(), lens};
+    return s.with_lens(lens);
 }
 template <class ReduceLens>
@@ -228,7 +228,7 @@ struct fused_reduce_compiler : compiler<fused_reduce_compiler>
        auto virtual_inputs = inputs;
        virtual_inputs.push_back(get_reduced_shape(inputs.front(), axes));
        virtual_inputs.push_back(get_output_shape(inputs.front(), axes));
-        virtual_inputs           = reduce_dims(virtual_inputs);
+        virtual_inputs           = reduce_dims(normalize_permutation(virtual_inputs));
        auto reduce_output_shape = virtual_inputs.back();
        virtual_inputs.pop_back();
        auto reduction_shape = virtual_inputs.back();

--- a/src/targets/gpu/lowering.cpp
+++ b/src/targets/gpu/lowering.cpp
@@ -106,7 +106,7 @@ struct miopen_apply
        add_extend_op("topk");
        add_convolution_op("convolution");
-        add_convolution_op("deconvolution");
+        add_convolution_op("convolution_backwards");
        add_convolution_op("quant_convolution");
        add_gemm_op<op::dot>("dot");
        add_gemm_op<op::quant_dot>("quant_dot");

--- a/src/targets/gpu/rocblas.cpp
+++ b/src/targets/gpu/rocblas.cpp
@@ -55,9 +55,16 @@ bool get_compute_fp32_flag()
 bool get_int8_x4_format(context& ctx)
 {
+#if ROCBLAS_VERSION_MAJOR >= 3
+    (void)(ctx);
+    return false;
+#else
+    // int8x4 packed format is only available starting from rocblas-v2.38 and it is deprecated in
+    // v3.0 and will be removed in v4.0
    rocblas_gemm_flags flag;
    rocblas_query_int8_layout_flag(ctx.get_stream().get_rocblas(), &flag);
    return flag == rocblas_gemm_flags_pack_int8x4;
+#endif
 }
 } // namespace gpu
 } // namespace MIGRAPHX_INLINE_NS

--- a/src/targets/ref/CMakeLists.txt
+++ b/src/targets/ref/CMakeLists.txt
@@ -37,6 +37,8 @@ target_link_libraries(migraphx_ref PUBLIC migraphx)
 target_include_directories(migraphx_ref PRIVATE ${BLAZE_INCLUDE})
 target_compile_definitions(migraphx_ref PRIVATE -DBLAZE_USE_CPP_THREADS)
+migraphx_generate_export_header(migraphx_ref)
 rocm_install_targets(
  TARGETS migraphx_ref
  INCLUDE

--- a/src/targets/ref/include/migraphx/ref/context.hpp
+++ b/src/targets/ref/include/migraphx/ref/context.hpp
@@ -25,6 +25,7 @@
 #define MIGRAPHX_GUARD_RTGLIB_CONTEXT_HPP
 #include <migraphx/config.hpp>
+#include <migraphx/ref/export.h>
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {

--- a/src/targets/ref/include/migraphx/ref/lowering.hpp
+++ b/src/targets/ref/include/migraphx/ref/lowering.hpp
@@ -24,14 +24,14 @@
 #ifndef MIGRAPHX_GUARD_RTGLIB_CPU_LOWERING_HPP
 #define MIGRAPHX_GUARD_RTGLIB_CPU_LOWERING_HPP
+#include <migraphx/ref/context.hpp>
 #include <migraphx/program.hpp>
-#include <migraphx/config.hpp>
 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
 namespace ref {
-struct lowering
+struct MIGRAPHX_REF_EXPORT lowering
 {
    std::string name() const { return "ref::lowering"; }
    void apply(module& m) const;

--- a/src/targets/ref/include/migraphx/ref/target.hpp
+++ b/src/targets/ref/include/migraphx/ref/target.hpp
@@ -35,7 +35,7 @@ inline namespace MIGRAPHX_INLINE_NS {
 struct pass;
 namespace ref {
-struct target
+struct MIGRAPHX_REF_EXPORT target
 {
    std::string name() const;
    std::vector<pass> get_passes(migraphx::context& ctx, const compile_options&) const;

--- a/src/targets/ref/lowering.cpp
+++ b/src/targets/ref/lowering.cpp
@@ -27,7 +27,7 @@
 #include <migraphx/dfor.hpp>
 #include <migraphx/op/identity.hpp>
 #include <migraphx/op/convolution.hpp>
-#include <migraphx/op/deconvolution.hpp>
+#include <migraphx/op/convolution_backwards.hpp>
 #include <migraphx/op/quant_convolution.hpp>
 #include <migraphx/op/dot.hpp>
 #include <migraphx/op/quant_dot.hpp>

--- a/src/tf/CMakeLists.txt
+++ b/src/tf/CMakeLists.txt
@@ -42,8 +42,9 @@ target_compile_options(tf-proto PRIVATE -w)
 target_link_libraries(tf-proto PRIVATE ${PROTOBUF_LIBRARY})
 set_target_properties(tf-proto PROPERTIES POSITION_INDEPENDENT_CODE On)
-file(GLOB TF_SRCS ${CONFIGURE_DEPENDS} *.cpp)
+file(GLOB TF_SRCS CONFIGURE_DEPENDS *.cpp)
 add_library(migraphx_tf ${TF_SRCS})
+migraphx_generate_export_header(migraphx_tf)
 target_include_directories(migraphx_tf PRIVATE include)
 set_target_properties(migraphx_tf PROPERTIES EXPORT_NAME tf)
 rocm_set_soversion(migraphx_tf ${MIGRAPHX_SO_VERSION})

--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -112,7 +112,7 @@ function(add_test_executable TEST_NAME)
    target_include_directories(${TEST_NAME} PUBLIC include)
 endfunction(add_test_executable)
-file(GLOB TESTS ${CONFIGURE_DEPENDS} *.cpp)
+file(GLOB TESTS CONFIGURE_DEPENDS *.cpp)
 foreach(TEST ${TESTS})
    get_filename_component(BASE_NAME ${TEST} NAME_WE)
@@ -122,7 +122,7 @@ endforeach()
 if(MIGRAPHX_ENABLE_GPU)
    # gpu tests
-    file(GLOB GPU_TESTS ${CONFIGURE_DEPENDS} gpu/*.cpp)
+    file(GLOB GPU_TESTS CONFIGURE_DEPENDS gpu/*.cpp)
    foreach(TEST ${GPU_TESTS})
        get_filename_component(BASE_NAME ${TEST} NAME_WE)
@@ -141,7 +141,7 @@ endif()
 if(MIGRAPHX_ENABLE_FPGA)
    # fpga tests
-    file(GLOB FPGA_TESTS ${CONFIGURE_DEPENDS} fpga/*.cpp)
+    file(GLOB FPGA_TESTS CONFIGURE_DEPENDS fpga/*.cpp)
    foreach(TEST ${FPGA_TESTS})
        get_filename_component(BASE_NAME ${TEST} NAME_WE)
@@ -191,7 +191,7 @@ endif()
 # multitarget test
 if(MIGRAPHX_ENABLE_GPU AND MIGRAPHX_ENABLE_CPU AND MIGRAPHX_ENABLE_FPGA)
    set(TEST_MULTI_TARGET_DIR ${CMAKE_CURRENT_SOURCE_DIR}/multi_target)
-    file(GLOB MULTI_TARGET_TESTS ${CONFIGURE_DEPENDS} ${TEST_MULTI_TARGET_DIR}/*.cpp)
+    file(GLOB MULTI_TARGET_TESTS CONFIGURE_DEPENDS ${TEST_MULTI_TARGET_DIR}/*.cpp)
    foreach(MULTI_TARGET_TEST ${MULTI_TARGET_TESTS})
        get_filename_component(BASE_NAME ${MULTI_TARGET_TEST} NAME_WE)
@@ -221,14 +221,14 @@ function(test_header NAME HEADER)
 endfunction()
 function(test_headers PREFIX)
-    file(GLOB HEADERS ${CONFIGURE_DEPENDS} ${ARGN})
+    file(GLOB HEADERS CONFIGURE_DEPENDS ${ARGN})
    foreach(HEADER ${HEADERS})
        file(RELATIVE_PATH HEADER_REL ${CMAKE_SOURCE_DIR} ${HEADER})
        string(MAKE_C_IDENTIFIER ${HEADER_REL} TEST_NAME)
        get_filename_component(BASE_NAME ${HEADER} NAME_WE)
        test_header(header_${TEST_NAME} ${PREFIX}/${BASE_NAME}.hpp)
-        target_link_libraries(header_${TEST_NAME} migraphx_all_targets)
+        target_link_libraries(header_${TEST_NAME} migraphx migraphx_onnx migraphx_tf migraphx_all_targets)
    endforeach()
 endfunction()

--- a/test/onnx/.onnxrt-commit
+++ b/test/onnx/.onnxrt-commit
-fbf08c4b4dce5da245189203d9f6cfc41f6663a2
+3be6eb53c8b359703cb645ed2cb1cdf106924b7c
--- a/test/onnx/conv_transpose_auto_pad_test.onnx
+++ b/test/onnx/conv_transpose_auto_pad_test.onnx
+conv_transpose_auto_pad_test:±
+:
+x
+wyconv1"
ConvTranspose*
+auto_pad"
+SAME_UPPER conv_transpose_auto_pad_testZ
+x
+Z
+w
+b
+y
+B
\ No newline at end of file
--- a/test/onnx/deconv_bias_test.onnx
+++ b/test/onnx/deconv_bias_test.onnx
-deconv_bias_test:ž
+conv_transpose_bias_test:¦
 "
 x
 w
-byconv1"
ConvTransposedeconv_bias_testZ
+byconv1"
ConvTransposeconv_transpose_bias_testZ
 x
@@ -24,4 +24,4 @@
 B
\ No newline at end of file
--- a/test/onnx/conv_transpose_dyn_asym_padding_test.onnx
+++ b/test/onnx/conv_transpose_dyn_asym_padding_test.onnx
--- a/test/onnx/conv_transpose_dyn_batch_test.onnx
+++ b/test/onnx/conv_transpose_dyn_batch_test.onnx
--- a/test/onnx/conv_transpose_dyn_img_test.onnx
+++ b/test/onnx/conv_transpose_dyn_img_test.onnx
--- a/test/onnx/conv_transpose_dyn_output_shape_test.onnx
+++ b/test/onnx/conv_transpose_dyn_output_shape_test.onnx