Merge branch 'develop' into blas_tuning

f3a8933c · Paul · ca300bd6 · b249fb8a · f3a8933c · f3a8933c
Commit f3a8933c authored Nov 02, 2023 by Paul
20 changed files
--- a/src/targets/gpu/argmin.cpp
+++ b/src/targets/gpu/argmin.cpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -40,7 +40,8 @@ argument hip_argmin::compute(context& ctx, const shape&, const std::vector<argum
 {
    auto n_dim         = args.front().get_shape().lens().size();
    int64_t tuned_axis = tune_axis(n_dim, op.axis, op.name());
-    device::argmin(ctx.get_stream().get(), args.back(), args.front(), tuned_axis);
+    device::argmin(
+        ctx.get_stream().get(), args.back(), args.front(), tuned_axis, op.select_last_index);
    return args.back();
 }


--- a/src/targets/gpu/compile_miopen.cpp
+++ b/src/targets/gpu/compile_miopen.cpp
@@ -60,9 +60,8 @@ struct miopen_op
 };
 MIGRAPHX_REGISTER_OP(miopen_op);

-std::size_t compile_miopen::compile(operation& op, instruction_ref ins, bool format) const
+std::size_t compile_miopen::compile(operation& op, instruction_ref ins) const
 {
-    op.from_value({{"int8_x4_format", format}});
    auto v = op.compile(*ctx, ins->get_shape(), to_shapes(ins->inputs()));
    return v.get<std::size_t>("workspace", 0);
 }
@@ -70,25 +69,15 @@ std::size_t compile_miopen::compile(operation& op, instruction_ref ins, bool for
 void compile_miopen::apply(module& m) const
 {
    assert(ctx);
-    const bool int8_x4_format = get_int8_x4_format(any_cast<migraphx::gpu::context>(*ctx));
    for(auto ins : iterator_for(m))
    {
        if(ins->name() != "gpu::miopen_op")
            continue;
        auto op        = any_cast<miopen_op>(ins->get_operator()).op;
        std::size_t ws = 0;
-        try
-        {
-            // for the regular convolution and convolution_backwards, this try would always succeed
-            ws = compile(op, ins, int8_x4_format);
-        }
-        catch(migraphx::exception&)
-        {
-            // In case no solver supports the default format, retry using the other format.
-            ws = compile(op, ins, not int8_x4_format);
-        }
-        auto inputs = ins->inputs();
-        auto alloc  = m.insert_instruction(
+        ws             = compile(op, ins);
+        auto inputs    = ins->inputs();
+        auto alloc     = m.insert_instruction(
            ins, make_op("allocate", {{"shape", to_value(shape{shape::int8_type, {ws}})}}));
        inputs.insert(std::prev(inputs.end()), alloc);


--- a/src/targets/gpu/compile_ops.cpp
+++ b/src/targets/gpu/compile_ops.cpp
@@ -37,6 +37,7 @@ inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {

 MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_GPU_COMPILE_PARALLEL);
+MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_TRACE_BENCHMARKING);

 struct precompile_op
 {
@@ -179,14 +180,29 @@ struct compile_plan
            MIGRAPHX_THROW("Multiple kernels without config");
        std::cout << "Benchmarking " << preop.name() << ": " << results.size() << " configs"
                  << std::endl;
+        if(enabled(MIGRAPHX_TRACE_BENCHMARKING{}))
+            std::cout << "Problem: " << config->problem << std::endl;
        std::vector<double> times;
        times.reserve(results.size());
-        std::transform(
-            results.begin(), results.end(), std::back_inserter(times), [&](const auto& cr) {
-                if(not cr.has_value())
-                    return std::numeric_limits<double>::max();
-                return time_op(*ctx, cr->replace.code_object, to_shapes(cr->ins->inputs()), 20);
-            });
+        std::transform(results.begin(),
+                       results.end(),
+                       config->solutions.begin(),
+                       std::back_inserter(times),
+                       [&](const auto& cr, const auto& solution) {
+                           if(enabled(MIGRAPHX_TRACE_BENCHMARKING{}))
+                               std::cout << "Benchmarking solution: " << solution << std::endl;
+                           if(not cr.has_value())
+                           {
+                               if(enabled(MIGRAPHX_TRACE_BENCHMARKING{}))
+                                   std::cout << "No binary" << std::endl;
+                               return std::numeric_limits<double>::max();
+                           }
+                           auto t = time_op(
+                               *ctx, cr->replace.code_object, to_shapes(cr->ins->inputs()), 20);
+                           if(enabled(MIGRAPHX_TRACE_BENCHMARKING{}))
+                               std::cout << t << "ms" << std::endl;
+                           return t;
+                       });
        auto i = std::distance(times.begin(), std::min_element(times.begin(), times.end()));
        std::cout << "Fastest solution: " << config->solutions.at(i) << std::endl;
        pc.insert(preop.name(), config->problem, config->solutions.at(i));

--- a/src/targets/gpu/device/argmax.cpp
+++ b/src/targets/gpu/device/argmax.cpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -34,9 +34,16 @@ inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 namespace device {

-void argmax(hipStream_t stream, const argument& result, const argument& arg, int64_t axis)
+void argmax(hipStream_t stream,
+            const argument& result,
+            const argument& arg,
+            int64_t axis,
+            bool select_last_index)
 {
-    arg_op(argmax_op{}, stream, result, arg, axis);
+    if(select_last_index)
+        arg_op(argmax_op_last_index{}, stream, result, arg, axis);
+    else
+        arg_op(argmax_op_first_index{}, stream, result, arg, axis);
 }

 } // namespace device

--- a/src/targets/gpu/device/argmin.cpp
+++ b/src/targets/gpu/device/argmin.cpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -34,9 +34,16 @@ inline namespace MIGRAPHX_INLINE_NS {
 namespace gpu {
 namespace device {

-void argmin(hipStream_t stream, const argument& result, const argument& arg, int64_t axis)
+void argmin(hipStream_t stream,
+            const argument& result,
+            const argument& arg,
+            int64_t axis,
+            bool select_last_index)
 {
-    arg_op(argmin_op{}, stream, result, arg, axis);
+    if(select_last_index)
+        arg_op(argmin_op_last_index{}, stream, result, arg, axis);
+    else
+        arg_op(argmin_op_first_index{}, stream, result, arg, axis);
 }

 } // namespace device

--- a/src/targets/gpu/device/int8_gemm_pack.cpp
+++ b/src/targets/gpu/device/int8_gemm_pack.cpp
-/*
- * The MIT License (MIT)
- *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include <migraphx/shape.hpp>
-#include <migraphx/argument.hpp>
-#include <migraphx/gpu/device/int8_gemm_pack.hpp>
-#include <migraphx/gpu/device/launch.hpp>
-#include <migraphx/gpu/device/types.hpp>
-#include <migraphx/gpu/device/tensor.hpp>
-
-namespace migraphx {
-inline namespace MIGRAPHX_INLINE_NS {
-namespace gpu {
-namespace device {
-
-void int8_gemm_pack_a(hipStream_t stream, const argument& result, const argument& arg)
-{
-    auto comp_shape    = arg.get_shape();
-    auto out_lens      = comp_shape.lens();
-    auto dim_0         = out_lens.size() - 2;
-    auto dim_1         = out_lens.size() - 1;
-    std::size_t lda    = comp_shape.strides()[dim_0];
-    std::size_t m_size = out_lens[dim_0] * out_lens[dim_1];
-    visit_all(result, arg)([&](auto output, auto input) {
-        std::size_t nelements = comp_shape.elements();
-        auto* out_ptr         = device_cast(output.data());
-        auto* in_ptr          = device_cast(input.data());
-        visit_tensor_size(out_lens.size(), [&](auto out_dim) {
-            hip_tensor_descriptor<out_dim> desc(comp_shape);
-            gs_launch(stream, nelements, 256)([=](auto ii) __device__ {
-                const size_t nb    = 4;
-                auto idx           = desc.multi(ii);
-                std::size_t i_m    = idx[dim_1];
-                std::size_t i_k    = idx[dim_0];
-                std::size_t offset = ii / m_size * m_size;
-                out_ptr[i_k % nb + (i_m + (i_k / nb) * lda) * nb + offset] =
-                    in_ptr[i_m + i_k * lda + offset];
-            });
-        });
-    });
-}
-
-void int8_gemm_pack_b(hipStream_t stream, const argument& result, const argument& arg)
-{
-    auto trans_shape = arg.get_shape();
-    auto out_lens    = trans_shape.lens();
-    auto dim_0       = trans_shape.lens().size() - 2;
-    auto dim_1       = trans_shape.lens().size() - 1;
-    std::size_t ldb  = trans_shape.strides()[dim_1];
-
-    auto wrap_lens = out_lens;
-    std::swap(wrap_lens[dim_0], wrap_lens[dim_1]);
-    shape comp_shape{trans_shape.type(), wrap_lens};
-    std::size_t m_size = out_lens[dim_0] * out_lens[dim_1];
-    visit_all(result, arg)([&](auto output, auto input) {
-        std::size_t nelements = comp_shape.elements();
-        auto* out_ptr         = device_cast(output.data());
-        auto* in_ptr          = device_cast(input.data());
-        visit_tensor_size(out_lens.size(), [&](auto out_dim) {
-            hip_tensor_descriptor<out_dim> desc(comp_shape);
-            gs_launch(stream, nelements, 256)([=](auto ii) __device__ {
-                const size_t nb    = 4;
-                auto idx           = desc.multi(ii);
-                std::size_t i_n    = idx[dim_1];
-                std::size_t i_k    = idx[dim_0];
-                std::size_t offset = ii / m_size * m_size;
-                out_ptr[i_k % nb + (i_n + (i_k / nb) * ldb) * nb + offset] =
-                    in_ptr[i_n + i_k * ldb + offset];
-            });
-        });
-    });
-}
-
-} // namespace device
-} // namespace gpu
-} // namespace MIGRAPHX_INLINE_NS
-} // namespace migraphx
--- a/src/targets/gpu/fuse_ck.cpp
+++ b/src/targets/gpu/fuse_ck.cpp
@@ -26,6 +26,7 @@
 #include <migraphx/matcher.hpp>
 #include <migraphx/pass_manager.hpp>
 #include <migraphx/register_op.hpp>
+#include <migraphx/gpu/device_name.hpp>

 namespace migraphx {
 inline namespace MIGRAPHX_INLINE_NS {
@@ -92,6 +93,8 @@ MIGRAPHX_PRED_MATCHER(is_ck_gemm, instruction_ref ins)
    auto m = a.lens()[a.lens().size() - 2];
    auto n = b.lens().back();
    auto k = a.lens().back();
+    auto batch_size = std::accumulate(
+        a.lens().rbegin() + 2, a.lens().rend(), std::size_t{1}, std::multiplies<std::size_t>());
    // Integer gemms must be divisible by 4 in ck
    if(contains({shape::int8_type, shape::int32_type}, ins->get_shape().type()))
    {
@@ -102,9 +105,17 @@ MIGRAPHX_PRED_MATCHER(is_ck_gemm, instruction_ref ins)
        if(k % 4 != 0)
            return false;
    }
-    // Skipping GEMMs with a K dimension greater than 2048 is a course-grained strategy
-    // to avoid poor-performing GEMM kernels from CK
-    // To-do: Investigate a more precise strategy
+    auto device_name = trim(split_string(get_device_name(), ':').front());
+    if(device_name == "gfx940")
+    {
+        if(ins->get_shape().type() == shape::half_type)
+        {
+            if(batch_size >= 64)
+                return m < 2048 or k <= 64 or n <= 384 or n >= 2048;
+            return true;
+        }
+        return true;
+    }
    return k <= 2048;
 }

@@ -140,6 +151,10 @@ struct find_ck_gemm_pointwise
               return not input->inputs().empty() and input->inputs().front()->name() == "capture";
           }))
            return;
+        if(std::any_of(ins->inputs().begin(), ins->inputs().end(), [](auto input) {
+               return not input->inputs().empty() and input->inputs().front()->name() == "capture";
+           }))
+            return;
        assert(gemm_it != inputs.end());
        if(gemm_idx != 0)
        {

--- a/src/targets/gpu/fuse_mlir.cpp
+++ b/src/targets/gpu/fuse_mlir.cpp
@@ -361,7 +361,8 @@ struct find_mlir_standalone_op
            return;

        static size_t counter = 0;
-        module_ref mm         = mpm.create_module("mlir_" + std::to_string(counter++));
+        module_ref mm =
+            mpm.create_module("mlir_" + conv_based_op->name() + std::to_string(counter++));
        mm->set_bypass();
        auto [anchor_op, top_inputs] = fuse_input_ops_and_gemm_based_op(mm, conv_based_op);
        mm->add_return({anchor_op});

--- a/src/targets/gpu/include/migraphx/gpu/compile_miopen.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/compile_miopen.hpp
@@ -42,7 +42,7 @@ struct compile_miopen
    context* ctx = nullptr;
    std::string name() const { return "gpu::compile_miopen"; }
    void apply(module& m) const;
-    std::size_t compile(operation& op, instruction_ref ins, bool format) const;
+    std::size_t compile(operation& op, instruction_ref ins) const;
 };

 } // namespace gpu

--- a/src/targets/gpu/include/migraphx/gpu/convolution.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/convolution.hpp
@@ -57,7 +57,6 @@ template <class Op>
 struct miopen_convolution
 {
    Op op;
-    bool int8_x4_format               = false;
    shared<convolution_descriptor> cd = nullptr;
    miopenConvFwdAlgorithm_t algo{};
 #ifdef MIGRAPHX_HAS_FIND_2_API
@@ -74,7 +73,6 @@ struct miopen_convolution
                    f(self.solution_object, "solution_object"),
 #endif
                    f(self.algo, "algo"),
-                    f(self.int8_x4_format, "int8_x4_format"),
                    f(self.solution_id, "solution_id"));
    }

@@ -94,9 +92,9 @@ struct miopen_convolution
    argument
    compute(context& ctx, const shape& output_shape, const std::vector<argument>& args) const
    {
-        auto x_desc = make_tensor(reshape_if_1d(args[0].get_shape()), int8_x4_format);
-        auto w_desc = make_tensor(reshape_if_1d(args[1].get_shape()), int8_x4_format);
-        auto y_desc = make_tensor(reshape_if_1d(output_shape));
+        auto x_desc                = make_tensor(reshape_if_1d(args[0].get_shape()));
+        auto w_desc                = make_tensor(reshape_if_1d(args[1].get_shape()));
+        auto y_desc                = make_tensor(reshape_if_1d(output_shape));
        auto* miopen_stream_handle = ctx.get_stream().get_miopen();
        auto workspace_size        = args[2].get_shape().bytes();

@@ -162,8 +160,8 @@ struct miopen_convolution
    shape find(context& ctx, const shape& output_shape, const std::vector<shape>& inputs)
    {
        shape workspace_shape{};
-        auto x_desc = make_tensor(reshape_if_1d(inputs[0]), int8_x4_format);
-        auto w_desc = make_tensor(reshape_if_1d(inputs[1]), int8_x4_format);
+        auto x_desc = make_tensor(reshape_if_1d(inputs[0]));
+        auto w_desc = make_tensor(reshape_if_1d(inputs[1]));
        auto y_desc = make_tensor(reshape_if_1d(output_shape));

        auto* miopen_stream_handle = ctx.get_stream().get_miopen();
@@ -179,13 +177,8 @@ struct miopen_convolution

        workspace_shape = shape{shape::int8_type, {workspace_size}};

-        auto x_shape = inputs[0];
-        auto w_shape = inputs[1];
-        if(int8_x4_format)
-        {
-            x_shape = pack_int8_shape(x_shape);
-            w_shape = pack_int8_shape(w_shape);
-        }
+        const auto& x_shape = inputs[0];
+        const auto& w_shape = inputs[1];

 #ifdef MIGRAPHX_HAS_FIND_2_API
        {
@@ -327,8 +320,8 @@ struct miopen_convolution
                                   ": workspace has changed during finalization.");
            }

-            auto x_desc = make_tensor(reshape_if_1d(inputs[0]), int8_x4_format);
-            auto w_desc = make_tensor(reshape_if_1d(inputs[1]), int8_x4_format);
+            auto x_desc = make_tensor(reshape_if_1d(inputs[0]));
+            auto w_desc = make_tensor(reshape_if_1d(inputs[1]));
            auto y_desc = make_tensor(reshape_if_1d(output_shape));

            auto status = miopenConvolutionForwardCompileSolution(ctx.get_stream().get_miopen(),
@@ -347,21 +340,6 @@ struct miopen_convolution
    {
        return shapes.size() - 1;
    }
-
-    inline shape pack_int8_shape(const shape& s) const
-    {
-        if(s.type() != shape::int8_type)
-        {
-            return s;
-        }
-
-        auto lens    = s.lens();
-        auto strides = s.strides();
-        lens[1]      = (lens[1] + 3) / 4 * 4;
-        strides[0]   = strides[1] * lens[1];
-
-        return {s.type(), lens, strides};
-    }
 };

 } // namespace gpu

--- a/src/targets/gpu/include/migraphx/gpu/device/arg_op.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/arg_op.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -55,7 +55,7 @@ MIGRAPHX_DEVICE_CONSTEXPR val_index<T> make_val_index(T v, int64_t i)
    return {v, i};
 }

-struct argmax_op
+struct argmax_op_first_index
 {
    template <class T>
    MIGRAPHX_DEVICE_CONSTEXPR val_index<T> operator()(val_index<T> x, val_index<T> y) const
@@ -73,7 +73,25 @@ struct argmax_op
    MIGRAPHX_DEVICE_CONSTEXPR auto init() const { return lowest(); }
 };

-struct argmin_op
+struct argmax_op_last_index
+{
+    template <class T>
+    MIGRAPHX_DEVICE_CONSTEXPR val_index<T> operator()(val_index<T> x, val_index<T> y) const
+    {
+        if(x.val > y.val)
+            return x;
+        else if(x.val < y.val)
+            return y;
+        else
+        {
+            return (x.index > y.index) ? x : y;
+        }
+    }
+
+    MIGRAPHX_DEVICE_CONSTEXPR auto init() const { return lowest(); }
+};
+
+struct argmin_op_first_index
 {
    template <class T>
    MIGRAPHX_DEVICE_CONSTEXPR val_index<T> operator()(val_index<T> x, val_index<T> y) const
@@ -91,6 +109,24 @@ struct argmin_op
    MIGRAPHX_DEVICE_CONSTEXPR auto init() const { return highest(); }
 };

+struct argmin_op_last_index
+{
+    template <class T>
+    MIGRAPHX_DEVICE_CONSTEXPR val_index<T> operator()(val_index<T> x, val_index<T> y) const
+    {
+        if(x.val < y.val)
+            return x;
+        else if(x.val > y.val)
+            return y;
+        else
+        {
+            return (x.index > y.index) ? x : y;
+        }
+    }
+
+    MIGRAPHX_DEVICE_CONSTEXPR auto init() const { return highest(); }
+};
+
 template <class Op>
 void arg_op(Op op, hipStream_t stream, const argument& result, const argument& arg, int64_t axis)
 {

--- a/src/targets/gpu/include/migraphx/gpu/device/argmax.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/argmax.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -36,7 +36,8 @@ namespace device {
 void MIGRAPHX_DEVICE_EXPORT argmax(hipStream_t stream,
                                   const argument& result,
                                   const argument& arg,
-                                   int64_t axis);
+                                   int64_t axis,
+                                   bool select_last_index);

 } // namespace device
 } // namespace gpu

--- a/src/targets/gpu/include/migraphx/gpu/device/argmin.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/argmin.hpp
 /*
 * The MIT License (MIT)
 *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
+ * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
@@ -36,7 +36,8 @@ namespace device {
 void MIGRAPHX_DEVICE_EXPORT argmin(hipStream_t stream,
                                   const argument& result,
                                   const argument& arg,
-                                   int64_t axis);
+                                   int64_t axis,
+                                   bool select_last_index);

 } // namespace device
 } // namespace gpu

--- a/src/targets/gpu/include/migraphx/gpu/device/int8_gemm_pack.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/device/int8_gemm_pack.hpp
-/*
- * The MIT License (MIT)
- *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#ifndef MIGRAPHX_GUARD_RTGLIB_DEVICE_INT8_GEMM_PACK_HPP
-#define MIGRAPHX_GUARD_RTGLIB_DEVICE_INT8_GEMM_PACK_HPP
-
-#include <migraphx/argument.hpp>
-#include <migraphx/gpu/device/config.hpp>
-#include <hip/hip_runtime_api.h>
-
-namespace migraphx {
-inline namespace MIGRAPHX_INLINE_NS {
-namespace gpu {
-namespace device {
-
-void MIGRAPHX_DEVICE_EXPORT int8_gemm_pack_a(hipStream_t stream,
-                                             const argument& result,
-                                             const argument& arg);
-
-void MIGRAPHX_DEVICE_EXPORT int8_gemm_pack_b(hipStream_t stream,
-                                             const argument& result,
-                                             const argument& arg);
-
-} // namespace device
-} // namespace gpu
-} // namespace MIGRAPHX_INLINE_NS
-} // namespace migraphx
-
-#endif
--- a/src/targets/gpu/include/migraphx/gpu/gemm.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/gemm.hpp
@@ -49,7 +49,6 @@ struct rocblas_gemm
    Op op;
    float alpha          = 1;
    float beta           = 0;
-    bool int8_x4_format  = true;
    bool compute_fp32    = false;
    unsigned trans_batch = 0;
    int32_t solution_idx = 0;
@@ -60,7 +59,6 @@ struct rocblas_gemm
        return pack_join(migraphx::reflect(self.op, f),
                         pack(f(self.alpha, "alpha"),
                              f(self.beta, "beta"),
-                              f(self.int8_x4_format, "int8_x4_format"),
                              f(self.compute_fp32, "compute_fp32"),
                              f(self.trans_batch, "trans_batch"),
                              f(self.solution_idx, "solution_idx")));

--- a/src/targets/gpu/include/migraphx/gpu/int8_conv_pack.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/int8_conv_pack.hpp
-/*
- * The MIT License (MIT)
- *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#ifndef MIGRAPHX_GUARD_RTGLIB_INT8_CONV_PACK_HPP
-#define MIGRAPHX_GUARD_RTGLIB_INT8_CONV_PACK_HPP
-
-#include <migraphx/argument.hpp>
-#include <migraphx/config.hpp>
-#include <utility>
-
-namespace migraphx {
-inline namespace MIGRAPHX_INLINE_NS {
-namespace gpu {
-
-struct context;
-
-struct miopen_int8_conv_pack
-{
-    std::string name() const { return "gpu::int8_conv_pack"; }
-    shape compute_shape(const std::vector<shape>& inputs) const;
-    argument compute(context& ctx, const shape&, const std::vector<argument>& args) const;
-    std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
-    {
-        return shapes.size() - 1;
-    }
-};
-
-} // namespace gpu
-} // namespace MIGRAPHX_INLINE_NS
-} // namespace migraphx
-
-#endif
--- a/src/targets/gpu/include/migraphx/gpu/miopen.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/miopen.hpp
@@ -127,7 +127,7 @@ inline void set_tensor_descriptor(miopenTensorArgumentId_t name,
 }
 #endif

-inline tensor_descriptor make_tensor(const migraphx::shape& os, bool pack = false)
+inline tensor_descriptor make_tensor(const migraphx::shape& os)
 {
    auto s = os.normalize_standard();
    auto t = make_obj<tensor_descriptor>(&miopenCreateTensorDescriptor);
@@ -142,23 +142,9 @@ inline tensor_descriptor make_tensor(const migraphx::shape& os, bool pack = fals
    else if(s.type() == shape::int32_type)
        d = miopenInt32;
    else if(s.type() == shape::int8_type)
-    {
-        if(pack)
-        {
-            // update the lens and corresponding strides
-            d          = miopenInt8x4;
-            lens[1]    = ((lens[1] + 3) / 4) * 4;
-            strides[0] = strides[1] * lens[1];
-        }
-        else
-        {
-            d = miopenInt8;
-        }
-    }
+        d = miopenInt8;
    else
-    {
        MIGRAPHX_THROW("MAKE_TENSOR: unsupported type");
-    }
    miopenSetTensorDescriptor(t.get(), d, s.lens().size(), lens.data(), strides.data());

    return t;

--- a/src/targets/gpu/include/migraphx/gpu/pack_int8_args.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/pack_int8_args.hpp
-/*
- * The MIT License (MIT)
- *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#ifndef MIGRAPHX_GUARD_RTGLIB_PACK_INT8_ARGS_HPP
-#define MIGRAPHX_GUARD_RTGLIB_PACK_INT8_ARGS_HPP
-
-#include <migraphx/program.hpp>
-#include <migraphx/gpu/context.hpp>
-
-namespace migraphx {
-inline namespace MIGRAPHX_INLINE_NS {
-
-namespace gpu {
-
-struct MIGRAPHX_GPU_EXPORT pack_int8_args
-{
-    std::string name() const { return "gpu::pack_int8_args"; }
-    void apply(module& m) const;
-    shape pack_int8_shape(const shape& s) const;
-};
-
-} // namespace gpu
-} // namespace MIGRAPHX_INLINE_NS
-} // namespace migraphx
-
-#endif
--- a/src/targets/gpu/include/migraphx/gpu/rocblas.hpp
+++ b/src/targets/gpu/include/migraphx/gpu/rocblas.hpp
@@ -41,8 +41,6 @@ struct context;

 MIGRAPHX_GPU_EXPORT bool get_compute_fp32_flag();

-MIGRAPHX_GPU_EXPORT bool get_int8_x4_format(context& ctx);
-
 } // namespace gpu
 } // namespace MIGRAPHX_INLINE_NS
 } // namespace migraphx

--- a/src/targets/gpu/int8_conv_pack.cpp
+++ b/src/targets/gpu/int8_conv_pack.cpp
-/*
- * The MIT License (MIT)
- *
- * Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to deal
- * in the Software without restriction, including without limitation the rights
- * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
- * copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in
- * all copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
- * THE SOFTWARE.
- */
-#include <migraphx/gpu/int8_conv_pack.hpp>
-#include <migraphx/gpu/context.hpp>
-
-namespace migraphx {
-inline namespace MIGRAPHX_INLINE_NS {
-namespace gpu {
-
-shape pack_int8_shape(const shape& s)
-{
-    if(s.type() != shape::int8_type)
-    {
-        MIGRAPHX_THROW("PACK_INT8_ARGS: only process int8_type");
-    }
-
-    auto lens    = s.lens();
-    auto strides = s.strides();
-    lens[1]      = (lens[1] + 3) / 4 * 4;
-    strides[0]   = strides[1] * lens[1];
-
-    return {s.type(), lens, strides};
-}
-
-shape miopen_int8_conv_pack::compute_shape(const std::vector<shape>& inputs) const
-{
-    check_shapes{{inputs.at(0)}, *this}.has(1).standard();
-    return pack_int8_shape(inputs.at(0));
-}
-
-argument
-miopen_int8_conv_pack::compute(context& ctx, const shape&, const std::vector<argument>& args) const
-{
-    auto arg_desc      = make_tensor(args[0].get_shape());
-    auto arg_desc_vec4 = make_tensor(args[0].get_shape(), true);
-
-    float alpha = 1;
-    float beta  = 0;
-    // pack input to vec4 format
-    auto status = miopenTransformTensor(ctx.get_stream().get_miopen(),
-                                        &alpha,
-                                        arg_desc.get(),
-                                        args[0].implicit(),
-                                        &beta,
-                                        arg_desc_vec4.get(),
-                                        args[1].implicit());
-    if(status != miopenStatusSuccess)
-    {
-        MIGRAPHX_THROW("INT8_CONV_PACK: transform input tensor failed");
-    }
-
-    return args[1];
-}
-
-} // namespace gpu
-} // namespace MIGRAPHX_INLINE_NS
-} // namespace migraphx