Commit f3a8933c authored by Paul's avatar Paul
Browse files

Merge branch 'develop' into blas_tuning

parents ca300bd6 b249fb8a
......@@ -2,6 +2,49 @@
Full documentation for MIGraphX is available at [MIGraphX Documentation](https://rocmdocs.amd.com/projects/AMDMIGraphX/en/latest/).
## MIGraphX 2.8 for ROCm 6.0.0
### Added
- Support for MI300 GPUs
- Support for TorchMIGraphX via PyTorch
- Boosted overall performance by integrating rocMLIR
- INT8 support for ONNX Runtime
- Support for ONNX version 1.14.1
- Added operators Qlinearadd, QlinearGlobalAveragePool, Qlinearconv, Shrink, CastLike, and RandomUniform operators
- Added an error message when gpu_targets is not set when compiling migraphx
- Added parameter to set tolerances with migraphx-driver verify
- Added support for MXR files >4 GB
- Added MIGRAPHX_TRACE_MLIR flag
- BETA added capability to use ROCm Composable Kernels via environment variable MIGRAPHX_ENABLE_CK=1
### Optimizations
- Improved performance support for INT8
- Improved time percision while benchmarking candidate kernels from CK or MLIR
- Remove contiguous from reshape parsing
- Updated ConstantOfShape operator to support Dynamic Batch
- Simplifies dynamic shapes related operators to their static versions if possible
- Improved debugging tools for accuracy issues
- Print warning about miopen_fusion while generating mxr
- General reduction in system memory usage during model compilation
- Created additional fusion opportunities during model compilation
- Improved debugging for matchers
- Improved general debug messages
### Fixed
- Fixed scatter operator for nonstandard shapes with some models from ONNX Model Zoo
- Provided a compile option to improve accuracy of some models by disabling Fast-Math
- Improved layernorm + pointwise fusion matching to ignore arguments order
- Fixed accuracy issue with ROIAlign operator
- Fixed Trilu operator computation logic
- Fixed support for the DETR model
### Changed
- Changed migraphx version to 2.8
- Extracted test packages as its own separate deb file when building migraphx from source
### Removed
- Removed building Python 2.7 bindings
## MIGraphX 2.7 for ROCm 5.7.0
### Added
- Enabled hipRTC to not require dev packages for migraphx runtime and allow the ROCm install to be in a different directory than it was during build time
......
......@@ -57,6 +57,12 @@ else()
option(MIGRAPHX_ENABLE_PYTHON "Enable python bindings" ON)
endif()
if(WIN32) # CK is not yet ported to Windows
option(MIGRAPHX_USE_COMPOSABLEKERNEL "Enable MIGraphX to use composable kernel JIT library" OFF)
else()
option(MIGRAPHX_USE_COMPOSABLEKERNEL "Enable MIGraphX to use composable kernel JIT library" ON)
endif()
find_path(HALF_INCLUDE_DIR half.hpp PATH_SUFFIXES half)
if (NOT HALF_INCLUDE_DIR)
message(FATAL_ERROR "Could not find half.hpp - Please check that the install path of half.hpp has been added to CMAKE_PREFIX_PATH")
......@@ -75,8 +81,9 @@ include(ROCMSetupVersion)
option(BUILD_DEV "Build for development purpose only" OFF)
rocm_setup_version(VERSION 2.8.0)
set(MIGRAPHX_SO_VERSION ${PROJECT_VERSION_MAJOR}.${PROJECT_VERSION_MINOR}.${PROJECT_VERSION_PATCH})
rocm_setup_version(VERSION 2.9.0)
math(EXPR MIGRAPHX_SO_MAJOR_VERSION "(${PROJECT_VERSION_MAJOR} * 1000 * 1000) + (${PROJECT_VERSION_MINOR} * 1000) + ${PROJECT_VERSION_PATCH}")
set(MIGRAPHX_SO_VERSION ${MIGRAPHX_SO_MAJOR_VERSION}.0)
option( BUILD_SHARED_LIBS "Build as a shared library" ON )
......
......@@ -30,7 +30,7 @@ def rocmtestnode(Map conf) {
rm -rf build
mkdir build
cd build
cmake -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DBUILD_DEV=On -DCMAKE_EXECUTE_PROCESS_COMMAND_ECHO=STDOUT ${flags} ..
cmake -DCMAKE_C_COMPILER_LAUNCHER=ccache -DCMAKE_CXX_COMPILER_LAUNCHER=ccache -DBUILD_DEV=On -DCMAKE_EXECUTE_PROCESS_COMMAND_ECHO=STDOUT -DMIGRAPHX_DISABLE_VIRTUAL_ENV=ON ${flags} ..
git diff
git diff-index --quiet HEAD || (echo "Git repo is not clean after running cmake." && exit 1)
make -j\$(nproc) generate VERBOSE=1
......
......@@ -32,7 +32,7 @@
#define MIGRAPHX_MIOPEN_ASSERT(x) (assert((x) == miopenStatusSuccess))
#define MIGRAPHX_HIP_ASSERT(x) (assert((x) == hipSuccess))
inline miopenTensorDescriptor_t make_miopen_tensor(const migraphx::shape& s, bool pack = false)
inline miopenTensorDescriptor_t make_miopen_tensor(const migraphx::shape& s)
{
miopenTensorDescriptor_t t;
MIGRAPHX_MIOPEN_ASSERT(miopenCreateTensorDescriptor(&t));
......@@ -49,23 +49,9 @@ inline miopenTensorDescriptor_t make_miopen_tensor(const migraphx::shape& s, boo
else if(s.type() == migraphx_shape_int32_type)
d = miopenInt32;
else if(s.type() == migraphx_shape_int8_type)
{
if(pack)
{
// update the lens and corresponding strides
d = miopenInt8x4;
lens[1] = ((lens[1] + 3) / 4) * 4;
strides[0] = strides[1] * lens[1];
}
else
{
d = miopenInt8;
}
}
d = miopenInt8;
else
{
throw("MAKE_TENSOR: unsupported type");
}
miopenSetTensorDescriptor(t, d, s_lens.size(), lens.data(), strides.data());
return t;
}
......
......@@ -149,9 +149,6 @@ gpu::gelu
gpu::gelu_new
gpu::gemm
gpu::greater
gpu::int8_conv_pack
gpu::int8_gemm_pack_a
gpu::int8_gemm_pack_b
gpu::layernorm
gpu::leaky_relu
gpu::less
......
......@@ -21,12 +21,12 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#####################################################################################
google/protobuf@v3.11.0 -DCMAKE_POSITION_INDEPENDENT_CODE=On -X subdir -Dprotobuf_BUILD_TESTS=Off
google/protobuf@v3.19.0 -DCMAKE_POSITION_INDEPENDENT_CODE=On -X subdir -Dprotobuf_BUILD_TESTS=Off
nlohmann/json@v3.8.0
live-clones/blaze@v3.8 -X header -DHEADER_DIR=blaze -H sha256:d0ff011f47538285178908ea5f2cab46bb6a8f55b1edb6e03224a82dbc1a3212
ROCmSoftwarePlatform/half@rocm-5.6.0
pybind/pybind11@d159a563383d10c821ba7b2a71905d1207db6de4 --build
msgpack/msgpack-c@cpp-3.3.0 -DMSGPACK_BUILD_TESTS=Off
sqlite3@3.17 -DCMAKE_POSITION_INDEPENDENT_CODE=On
sqlite3@3.43.2 -DCMAKE_POSITION_INDEPENDENT_CODE=On
ROCmSoftwarePlatform/composable_kernel@70eefcf4f263aa5c25f3c9ff0db8f6f199ef0fb9 -DCK_BUILD_JIT_LIB=On -DCMAKE_POSITION_INDEPENDENT_CODE=On
ROCmSoftwarePlatform/rocMLIR@507bb94ce7873786486d296ec81d2eadaab49003 -DBUILD_FAT_LIBROCKCOMPILER=On
\ No newline at end of file
ROCmSoftwarePlatform/rocMLIR@3700afd2564e21267a4d1fd8f1f80465f45daa93 -DBUILD_FAT_LIBROCKCOMPILER=On
......@@ -261,9 +261,8 @@ find_package(nlohmann_json 3.8.0 REQUIRED)
target_link_libraries(migraphx PRIVATE nlohmann_json::nlohmann_json)
migraphx_generate_export_header(migraphx)
find_package(PkgConfig)
pkg_check_modules(SQLITE3 REQUIRED IMPORTED_TARGET sqlite3)
target_link_libraries(migraphx PRIVATE PkgConfig::SQLITE3)
find_package(SQLite3 REQUIRED)
target_link_libraries(migraphx PRIVATE SQLite::SQLite3)
find_package(msgpackc-cxx QUIET)
if(NOT msgpackc-cxx_FOUND)
......
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
......@@ -37,20 +37,22 @@ namespace op {
* Static allocate:
* No inputs: `allocate()`
* `this.s` attribute set to the static output shape of the buffer.
* `this.s` attribute can be set to a dynamic output shape; however this will allocate the maximum
* buffer size for that case
*
* Dynamic allocate:
* One input: `allocate(output_dims)`
* `output_dims` are the output buffer dimensions and has a static shape.
* Either `this.s` or `this.buf_type` must be set to calculate the dynamic output shape at compute
* time. If `this.buf_type` is set, the compute_shape() of allocate at compile time will have
* dynamic_dimensions from {0, max_int} with rank = output_dims.ndim(). If `this.s` is set then the
* compute_shape() will output `this.s`; `this.s` should be a dynamic shape.
* Either `this.s` or `this.buf_type` (but not both) must be set to calculate the dynamic output
* shape at compute time. If `this.buf_type` is set, the compute_shape() of allocate at compile time
* will have dynamic_dimensions from {0, max_int} with rank = output_dims.ndim(). If `this.s` is set
* then the compute_shape() will output `this.s`; `this.s` should be a dynamic shape.
*/
struct allocate
{
shape s{};
optional<shape> s;
// for dynamic allocate to set the buffer type
shape::type_t buf_type = shape::half_type;
optional<shape::type_t> buf_type;
template <class Self, class F>
static auto reflect(Self& self, F f)
......@@ -62,8 +64,12 @@ struct allocate
shape compute_shape(const std::vector<shape>& inputs) const
{
if(s != shape())
if(s.has_value())
{
if(buf_type.has_value())
{
MIGRAPHX_THROW("ALLOCATE: shape and buf_type attributes both set");
}
if(inputs.size() == 1)
{
migraphx::check_shapes{inputs, *this, false}.only_dims(1);
......@@ -72,16 +78,20 @@ struct allocate
{
migraphx::check_shapes{inputs, *this, false}.has(0);
}
return s;
return s.value();
}
else
{
if(not buf_type.has_value())
{
MIGRAPHX_THROW("ALLOCATE: shape and buf_type attributes both not set");
}
migraphx::check_shapes{inputs, *this, false}.has(1).only_dims(1);
const auto& out_dims = inputs.at(0);
std::size_t max_val = std::numeric_limits<std::size_t>::max();
std::vector<shape::dynamic_dimension> dyn_dims(out_dims.lens().at(0),
shape::dynamic_dimension{0, max_val});
return {buf_type, dyn_dims};
return {buf_type.value(), dyn_dims};
}
}
argument compute(const shape& output_shape, const std::vector<argument>& args) const
......@@ -94,7 +104,11 @@ struct allocate
{
std::vector<std::size_t> output_dims(output_shape.ndim());
args.at(0).visit([&](auto a) { output_dims.assign(a.begin(), a.end()); });
return argument{shape{buf_type, output_dims}};
if(s)
{
return argument{shape{s->type(), output_dims}};
}
return argument{shape{buf_type.value(), output_dims}};
}
}
};
......
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
......@@ -31,6 +31,7 @@
#include <migraphx/value.hpp>
#include <migraphx/op/normalize_attribute.hpp>
#include <migraphx/dyn_output.hpp>
#include <migraphx/float_equal.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
......@@ -38,12 +39,13 @@ namespace op {
struct argmax
{
int64_t axis = 0;
int64_t axis = 0;
bool select_last_index = false;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return pack(f(self.axis, "axis"));
return pack(f(self.axis, "axis"), f(self.select_last_index, "select_last_index"));
}
value attributes() const
......@@ -87,6 +89,10 @@ struct argmax
max_val = cur_val;
max_index = i;
}
else if(select_last_index and float_equal(max_val, cur_val))
{
max_index = i;
}
}
return max_index;
}
......
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
......@@ -30,6 +30,7 @@
#include <migraphx/config.hpp>
#include <migraphx/value.hpp>
#include <migraphx/op/normalize_attribute.hpp>
#include <migraphx/float_equal.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
......@@ -38,11 +39,12 @@ namespace op {
struct argmin
{
int64_t axis = 0;
bool select_last_index = false;
template <class Self, class F>
static auto reflect(Self& self, F f)
{
return pack(f(self.axis, "axis"));
return pack(f(self.axis, "axis"), f(self.select_last_index, "select_last_index"));
}
value attributes() const
......@@ -78,6 +80,10 @@ struct argmin
min_val = cur_val;
min_index = i;
}
else if(select_last_index and float_equal(min_val, cur_val))
{
min_index = i;
}
}
return min_index;
......
......@@ -30,6 +30,7 @@
#include <migraphx/rank.hpp>
#include <migraphx/requires.hpp>
#include <migraphx/config.hpp>
#include <migraphx/optional.hpp>
#include <vector>
namespace migraphx {
......@@ -68,6 +69,19 @@ auto stream_write_value_impl(rank<1>, std::ostream& os, const T& x) -> decltype(
os << x;
}
template <class T>
auto stream_write_value_impl(rank<1>, std::ostream& os, const optional<T>& x)
{
if(x.has_value())
{
os << *x;
}
else
{
os << "nullopt";
}
}
template <class T>
void stream_write_value_impl(rank<1>, std::ostream& os, const std::vector<T>& r)
{
......
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
......@@ -50,14 +50,25 @@ struct parse_arg_op : op_parser<parse_arg_op>
keep_dims = parser.parse_value(info.attributes.at("keepdims")).at<int>();
}
bool select_last_index = false;
if(contains(info.attributes, "select_last_index"))
{
select_last_index = static_cast<bool>(
parser.parse_value(info.attributes.at("select_last_index")).at<int>());
}
if(keep_dims == 0)
{
auto ins = info.add_instruction(make_op(opd.op_name, {{"axis", axis}}), args);
auto ins = info.add_instruction(
make_op(opd.op_name, {{"axis", axis}, {"select_last_index", select_last_index}}),
args);
return info.add_instruction(make_op("squeeze", {{"axes", {axis}}}), ins);
}
else
{
return info.add_instruction(make_op(opd.op_name, {{"axis", axis}}), args);
return info.add_instruction(
make_op(opd.op_name, {{"axis", axis}, {"select_last_index", select_last_index}}),
args);
}
}
};
......
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
......@@ -21,43 +21,66 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef MIGRAPHX_GUARD_RTGLIB_INT8_GEMM_PACK_HPP
#define MIGRAPHX_GUARD_RTGLIB_INT8_GEMM_PACK_HPP
#include <migraphx/argument.hpp>
#include <migraphx/config.hpp>
#include <utility>
#include <migraphx/onnx/op_parser.hpp>
#include <migraphx/ranges.hpp>
#include <migraphx/instruction.hpp>
#include <migraphx/make_op.hpp>
#include <migraphx/onnx/checks.hpp>
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
namespace gpu {
struct context;
namespace onnx {
struct hip_int8_gemm_pack_a
struct parse_mean_variance_normalization : op_parser<parse_mean_variance_normalization>
{
std::string name() const { return "gpu::int8_gemm_pack_a"; }
shape compute_shape(const std::vector<shape>& inputs) const;
argument compute(context& ctx, const shape&, const std::vector<argument>& args) const;
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
{
return shapes.size() - 1;
}
};
std::vector<op_desc> operators() const { return {{"MeanVarianceNormalization"}}; }
struct hip_int8_gemm_pack_b
{
std::string name() const { return "gpu::int8_gemm_pack_b"; }
shape compute_shape(const std::vector<shape>& inputs) const;
argument compute(context& ctx, const shape&, const std::vector<argument>& args) const;
std::ptrdiff_t output_alias(const std::vector<shape>& shapes) const
instruction_ref parse(const op_desc& /*opd*/,
const onnx_parser& /*parser*/,
onnx_parser::node_info info,
std::vector<instruction_ref> args) const
{
return shapes.size() - 1;
auto&& data = args.front();
auto data_rank = data->get_shape().ndim();
std::vector<int64_t> axes{0, 2, 3};
if(contains(info.attributes, "axes"))
{
const auto& axes_attr = info.attributes["axes"].ints();
axes.assign(axes_attr.begin(), axes_attr.end());
}
else if(data_rank != 4)
{
MIGRAPHX_THROW(
"Input tensor needs to be rank 4 when axes is not specified. Instead it is rank " +
std::to_string(data_rank));
}
if(axes.size() != data_rank - 1)
{
MIGRAPHX_THROW("Length of axes array needs to be equal to input tensor rank - 1");
}
auto data_mean = info.add_instruction(make_op("reduce_mean", {{"axes", axes}}), data);
auto data_mean_squared = info.add_common_op("mul", data_mean, data_mean);
auto data_squared = info.add_common_op("mul", data, data);
auto data_squared_mean =
info.add_instruction(make_op("reduce_mean", {{"axes", axes}}), data_squared);
auto mean_sub = info.add_common_op("sub", data_squared_mean, data_mean_squared);
auto std = info.add_common_op("sqrt", mean_sub);
auto dividend = info.add_common_op("sub", data, data_mean);
auto epsilon =
info.add_literal({data->get_shape().type(),
{data->get_shape().type() == shape::half_type ? 1e-7 : 1e-9}});
auto divisor = info.add_common_op("add", std, epsilon);
return info.add_common_op("div", dividend, divisor);
}
};
} // namespace gpu
} // namespace onnx
} // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx
#endif
......@@ -137,7 +137,7 @@ struct parse_slice : op_parser<parse_slice>
sd.always_insert(args.at(0));
// If axes arg is not given, the default is all of them.
if(sd.op.axes.empty() and sd.op_args.size() < 3)
if(sd.op.axes.empty() and sd.op_args.size() <= 3)
{
std::vector<int64_t> axes(args[0]->get_shape().ndim());
std::iota(axes.begin(), axes.end(), int64_t{0});
......
......@@ -56,9 +56,6 @@ struct parse_trilu : op_parser<parse_trilu>
k = arg_k.at<int>();
}
if(k < 0)
MIGRAPHX_THROW("PARSE_TRILU: negative k values not supported");
if(contains(info.attributes, "upper"))
{
upper = static_cast<bool>(info.attributes.at("upper").i());
......@@ -69,9 +66,12 @@ struct parse_trilu : op_parser<parse_trilu>
// when creating the mask, if upper == 1,
// the inner triangle will have values set to 0
std::vector<bool> mask_mat(num_rows * num_cols, upper);
// if upper == 0, kth diagonal must also be masked
if(not upper)
k++;
for(size_t i = 0; i < num_rows; i++)
{
for(size_t j = 0; j < std::min(k, static_cast<int>(num_cols)); j++)
for(int j = 0; j < std::min(k, static_cast<int>(num_cols)); j++)
{
mask_mat[i * num_cols + j] = not upper;
}
......
......@@ -936,7 +936,7 @@ void program::perf_report(std::ostream& os,
os << std::endl;
os << "Batch size: " << batch << std::endl;
os << "Rate: " << rate * batch << "/sec" << std::endl;
os << "Rate: " << rate * batch << " inferences/sec" << std::endl;
os << "Total time: " << total_time << "ms" << std::endl;
os << "Total instructions time: " << total_instruction_time << "ms" << std::endl;
os << "Overhead time: " << overhead_time << "ms"
......
......@@ -147,8 +147,8 @@ void quantize_int8(program& prog,
run_passes(prog,
{quantize_int8_pass{ins_names, *int8_quant_params},
optimize_module{},
simplify_qdq{},
optimize_module{},
dead_code_elimination{}});
}
......
......@@ -33,6 +33,8 @@
namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS {
MIGRAPHX_DECLARE_ENV_VAR(MIGRAPHX_ENABLE_CK_WORKAROUNDS);
void apply_quantizelinear(module& m, instruction_ref ins)
{
assert(ins->name() == "quantizelinear");
......@@ -62,9 +64,22 @@ void apply_quantizelinear(module& m, instruction_ref ins)
max_quant = qt.max();
min_quant = qt.min();
});
auto s = add_zero_point->get_shape();
auto min_arg = m.add_literal(literal{shape{s.type()}, {min_quant}});
auto max_arg = m.add_literal(literal{shape{s.type()}, {max_quant}});
auto s = add_zero_point->get_shape();
instruction_ref min_arg;
instruction_ref max_arg;
if(enabled(MIGRAPHX_ENABLE_CK_WORKAROUNDS{}))
{
std::vector<int> min_data(s.elements(), min_quant);
std::vector<int> max_data(s.elements(), max_quant);
min_arg = m.add_literal(literal(s, min_data));
max_arg = m.add_literal(literal(s, max_data));
}
else
{
min_arg = m.add_literal(literal{shape{s.type()}, {min_quant}});
max_arg = m.add_literal(literal{shape{s.type()}, {max_quant}});
}
auto saturate = insert_common_op(m, ins, make_op("clip"), {add_zero_point, min_arg, max_arg});
m.replace_instruction(
ins, make_op("convert", {{"target_type", ins->get_shape().type()}}), saturate);
......
......@@ -37,8 +37,7 @@ if(NOT TARGET MIOpen)
message(SEND_ERROR "Cant find miopen")
endif()
if(NOT WIN32)
# TODO: re-enable when CK is ported to Windows
if(MIGRAPHX_USE_COMPOSABLEKERNEL)
find_package(composable_kernel 1.0.0 REQUIRED COMPONENTS jit_library)
endif()
......@@ -52,10 +51,10 @@ file(GLOB KERNEL_FILES CONFIGURE_DEPENDS
${CMAKE_CURRENT_SOURCE_DIR}/kernels/include/migraphx/kernels/*.hpp)
message(STATUS "KERNEL_FILES: ${KERNEL_FILES}")
if(WIN32)
# TODO: re-enable when CK is ported to Windows
if(NOT MIGRAPHX_USE_COMPOSABLEKERNEL)
list(REMOVE_ITEM KERNEL_FILES
${CMAKE_CURRENT_SOURCE_DIR}/kernels/include/migraphx/kernels/ck_gemm.hpp
${CMAKE_CURRENT_SOURCE_DIR}/kernels/include/migraphx/kernels/ck_gemm_softmax_gemm.hpp
${CMAKE_CURRENT_SOURCE_DIR}/kernels/include/migraphx/kernels/ck.hpp)
endif()
......@@ -103,9 +102,10 @@ rocm_clang_tidy_check(kernel_file_check)
file(GLOB JIT_GPU_SRCS CONFIGURE_DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/jit/*.cpp)
if(WIN32)
# TODO: re-enable when CK is ported to Windows
list(REMOVE_ITEM JIT_GPU_SRCS ${CMAKE_CURRENT_SOURCE_DIR}/jit/ck_gemm.cpp)
if(NOT MIGRAPHX_USE_COMPOSABLEKERNEL)
list(REMOVE_ITEM JIT_GPU_SRCS
${CMAKE_CURRENT_SOURCE_DIR}/jit/ck_gemm.cpp
${CMAKE_CURRENT_SOURCE_DIR}/jit/ck_gemm_softmax_gemm.cpp)
endif()
add_library(migraphx_gpu
......@@ -128,8 +128,6 @@ add_library(migraphx_gpu
gather.cpp
gemm_impl.cpp
hip.cpp
int8_conv_pack.cpp
int8_gemm_pack.cpp
kernel.cpp
lowering.cpp
logsoftmax.cpp
......@@ -140,7 +138,6 @@ add_library(migraphx_gpu
no_device.cpp
nonzero.cpp
pack_args.cpp
pack_int8_args.cpp
prefuse_ops.cpp
pad.cpp
perfdb.cpp
......@@ -184,7 +181,6 @@ register_migraphx_gpu_ops(hip_
register_migraphx_gpu_ops(miopen_
abs
contiguous
int8_conv_pack
lrn
pooling
)
......@@ -192,10 +188,6 @@ register_op(migraphx_gpu
HEADER migraphx/gpu/rnn_variable_seq_lens.hpp
OPERATORS gpu::hip_rnn_var_sl_shift_sequence gpu::hip_rnn_var_sl_shift_output gpu::hip_rnn_var_sl_last_output
INCLUDES migraphx/gpu/context.hpp)
register_op(migraphx_gpu
HEADER migraphx/gpu/int8_gemm_pack.hpp
OPERATORS gpu::hip_int8_gemm_pack_a gpu::hip_int8_gemm_pack_b
INCLUDES migraphx/gpu/context.hpp)
register_op(migraphx_gpu
HEADER migraphx/gpu/gemm.hpp
OPERATORS gpu::rocblas_gemm<op::dot> gpu::rocblas_gemm<op::quant_dot>
......@@ -292,8 +284,7 @@ endif()
target_link_libraries(migraphx_gpu PUBLIC migraphx MIOpen roc::rocblas)
target_link_libraries(migraphx_gpu PRIVATE migraphx_device migraphx_kernels)
if(NOT WIN32)
# TODO: re-enable when CK is ported to Windows
if(MIGRAPHX_USE_COMPOSABLEKERNEL)
target_link_libraries(migraphx_gpu PRIVATE composable_kernel::jit_library)
endif()
......
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
* Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
......@@ -40,7 +40,8 @@ argument hip_argmax::compute(context& ctx, const shape&, const std::vector<argum
{
auto n_dim = args.front().get_shape().lens().size();
int64_t tuned_axis = tune_axis(n_dim, op.axis, op.name());
device::argmax(ctx.get_stream().get(), args.back(), args.front(), tuned_axis);
device::argmax(
ctx.get_stream().get(), args.back(), args.front(), tuned_axis, op.select_last_index);
return args.back();
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment