Commit cb10ae76 authored by Khalique Ahmed's avatar Khalique Ahmed
Browse files

Merge branch 'develop' of https://github.com/ROCmSoftwarePlatform/AMDMIGraphX into nhwc_workaround

parents 498e6c9d 75e6618c
...@@ -43,6 +43,8 @@ else() ...@@ -43,6 +43,8 @@ else()
endif() endif()
endif() endif()
set(CMAKE_INSTALL_PREFIX "/opt/rocm" CACHE PATH "")
set(CMAKE_BUILD_RPATH "${CMAKE_BINARY_DIR}/lib") set(CMAKE_BUILD_RPATH "${CMAKE_BINARY_DIR}/lib")
project(migraphx LANGUAGES C CXX) project(migraphx LANGUAGES C CXX)
...@@ -114,6 +116,7 @@ rocm_enable_clang_tidy( ...@@ -114,6 +116,7 @@ rocm_enable_clang_tidy(
llvm-namespace-comment llvm-namespace-comment
misc-* misc-*
-misc-confusable-identifiers -misc-confusable-identifiers
-misc-use-anonymous-namespace
modernize-* modernize-*
performance-* performance-*
readability-* readability-*
......
...@@ -10,7 +10,7 @@ RUN apt-get update && apt-get install -y gnupg2 --no-install-recommends curl && ...@@ -10,7 +10,7 @@ RUN apt-get update && apt-get install -y gnupg2 --no-install-recommends curl &&
curl -sL http://repo.radeon.com/rocm/rocm.gpg.key | apt-key add - curl -sL http://repo.radeon.com/rocm/rocm.gpg.key | apt-key add -
# Add rocm repository # Add rocm repository
RUN sh -c 'echo deb [arch=amd64 trusted=yes] http://repo.radeon.com/rocm/apt/5.5/ focal main > /etc/apt/sources.list.d/rocm.list' RUN sh -c 'echo deb [arch=amd64 trusted=yes] http://repo.radeon.com/rocm/apt/5.6/ focal main > /etc/apt/sources.list.d/rocm.list'
# From docs.amd.com for installing rocm. Needed to install properly # From docs.amd.com for installing rocm. Needed to install properly
RUN sh -c "echo 'Package: *\nPin: release o=repo.radeon.com\nPin-priority: 600' > /etc/apt/preferences.d/rocm-pin-600" RUN sh -c "echo 'Package: *\nPin: release o=repo.radeon.com\nPin-priority: 600' > /etc/apt/preferences.d/rocm-pin-600"
...@@ -113,7 +113,8 @@ RUN git clone --single-branch --branch ${ONNXRUNTIME_BRANCH} --recursive ${ONNXR ...@@ -113,7 +113,8 @@ RUN git clone --single-branch --branch ${ONNXRUNTIME_BRANCH} --recursive ${ONNXR
ADD tools/build_and_test_onnxrt.sh /onnxruntime/build_and_test_onnxrt.sh ADD tools/build_and_test_onnxrt.sh /onnxruntime/build_and_test_onnxrt.sh
RUN cget -p /usr/local install ROCmSoftwarePlatform/rocMLIR@8d25af3b3721c159bb41cc6388e9453b1018c126 -DBUILD_MIXR_TARGET=On -DLLVM_ENABLE_ZSTD=Off -DLLVM_ENABLE_THREADS=Off # Use the /opt/cmake install because LLVM/MLIR need cmake >= 3.20
RUN env PATH=/opt/cmake/bin:$PATH cget -p /usr/local install ROCmSoftwarePlatform/rocMLIR@1ad9d6df32acc6d29d58e8ed6710e36746d0a4d6 -DBUILD_FAT_LIBROCKCOMPILER=On
ENV MIOPEN_FIND_DB_PATH=/tmp/miopen/find-db ENV MIOPEN_FIND_DB_PATH=/tmp/miopen/find-db
ENV MIOPEN_USER_DB_PATH=/tmp/miopen/user-db ENV MIOPEN_USER_DB_PATH=/tmp/miopen/user-db
......
...@@ -91,7 +91,7 @@ def rocmnodename(name) { ...@@ -91,7 +91,7 @@ def rocmnodename(name) {
node_name = "${rocmtest_name} && navi21"; node_name = "${rocmtest_name} && navi21";
} else if(name == "mi100+") { } else if(name == "mi100+") {
node_name = "${rocmtest_name} && (gfx908 || gfx90a)"; node_name = "${rocmtest_name} && (gfx908 || gfx90a)";
} else if(name == "anygpu") { } else if(name == "cdna") {
node_name = "${rocmtest_name} && (gfx908 || gfx90a || vega)"; node_name = "${rocmtest_name} && (gfx908 || gfx90a || vega)";
} else if(name == "nogpu") { } else if(name == "nogpu") {
node_name = "${rocmtest_name} && nogpu"; node_name = "${rocmtest_name} && nogpu";
...@@ -105,35 +105,29 @@ def rocmnode(name, body) { ...@@ -105,35 +105,29 @@ def rocmnode(name, body) {
} }
} }
rocmtest clang_debug: rocmnode('vega') { cmake_build -> rocmtest clang_debug: rocmnode('cdna') { cmake_build ->
stage('Hip Clang Debug') { stage('hipRTC Debug') {
def sanitizers = "undefined" def sanitizers = "undefined"
def debug_flags = "-g -O2 -fsanitize=${sanitizers} -fno-sanitize-recover=${sanitizers}" def debug_flags = "-g -O2 -fsanitize=${sanitizers} -fno-sanitize-recover=${sanitizers}"
cmake_build(flags: "-DCMAKE_BUILD_TYPE=debug -DMIGRAPHX_ENABLE_PYTHON=Off -DCMAKE_CXX_FLAGS_DEBUG='${debug_flags}' -DCMAKE_C_FLAGS_DEBUG='${debug_flags}'") cmake_build(flags: "-DCMAKE_BUILD_TYPE=debug -DMIGRAPHX_ENABLE_PYTHON=Off -DCMAKE_CXX_FLAGS_DEBUG='${debug_flags}' -DCMAKE_C_FLAGS_DEBUG='${debug_flags}' -DMIGRAPHX_USE_HIPRTC=On", gpu_debug: true, hiprtc_workarounds: true)
} }
}, clang_gpu_debug: rocmnode('vega') { cmake_build -> }, clang_release: rocmnode('cdna') { cmake_build ->
stage('Hip Clang GPU Debug') {
cmake_build(flags: "-DCMAKE_BUILD_TYPE=release", gpu_debug: true)
}
}, clang_release: rocmnode('vega') { cmake_build ->
stage('Hip Clang Release') { stage('Hip Clang Release') {
cmake_build(flags: "-DCMAKE_BUILD_TYPE=release") cmake_build(flags: "-DCMAKE_BUILD_TYPE=release")
stash includes: 'build/*.deb', name: 'migraphx-package' stash includes: 'build/*.deb', name: 'migraphx-package'
} }
}, hiprtc_gpu_debug: rocmnode('vega') { cmake_build -> }, all_targets_debug : rocmnode('cdna') { cmake_build ->
stage('HipRTC GPU Debug') {
cmake_build(flags: "-DCMAKE_BUILD_TYPE=release -DMIGRAPHX_USE_HIPRTC=On", gpu_debug: true, hiprtc_workarounds: true)
}
}, all_targets_debug : rocmnode('vega') { cmake_build ->
stage('All targets Release') { stage('All targets Release') {
cmake_build(flags: "-DCMAKE_BUILD_TYPE=release -DMIGRAPHX_ENABLE_GPU=On -DMIGRAPHX_ENABLE_CPU=On -DMIGRAPHX_ENABLE_FPGA=On") cmake_build(flags: "-DCMAKE_BUILD_TYPE=release -DMIGRAPHX_ENABLE_GPU=On -DMIGRAPHX_ENABLE_CPU=On -DMIGRAPHX_ENABLE_FPGA=On")
} }
}, mlir_debug: rocmnode('vega') { cmake_build -> }, mlir_debug: rocmnode('cdna') { cmake_build ->
stage('MLIR Debug') { stage('MLIR Debug') {
withEnv(['MIGRAPHX_ENABLE_MLIR=1']) { withEnv(['MIGRAPHX_ENABLE_MLIR=1']) {
def sanitizers = "undefined" def sanitizers = "undefined"
def debug_flags = "-g -O2 -fsanitize=${sanitizers} -fno-sanitize-recover=${sanitizers}" // Note: the -fno-sanitize= is copied from upstream LLVM_UBSAN_FLAGS.
cmake_build(flags: "-DCMAKE_BUILD_TYPE=debug -DMIGRAPHX_ENABLE_PYTHON=Off -DMIGRAPHX_ENABLE_MLIR=On -DCMAKE_CXX_FLAGS_DEBUG='${debug_flags}' -DCMAKE_C_FLAGS_DEBUG='${debug_flags}'") def debug_flags_cxx = "-g -O2 -fsanitize=${sanitizers} -fno-sanitize=vptr,function -fno-sanitize-recover=${sanitizers}"
def debug_flags = "-g -O2 -fsanitize=${sanitizers} -fno-sanitize=vptr -fno-sanitize-recover=${sanitizers}"
cmake_build(flags: "-DCMAKE_BUILD_TYPE=debug -DMIGRAPHX_ENABLE_PYTHON=Off -DMIGRAPHX_ENABLE_MLIR=On -DCMAKE_CXX_FLAGS_DEBUG='${debug_flags_cxx}' -DCMAKE_C_FLAGS_DEBUG='${debug_flags}'")
} }
} }
}, ck_release: rocmnode('mi100+') { cmake_build -> }, ck_release: rocmnode('mi100+') { cmake_build ->
...@@ -163,7 +157,7 @@ def onnxnode(name, body) { ...@@ -163,7 +157,7 @@ def onnxnode(name, body) {
} }
} }
rocmtest onnx: onnxnode('anygpu') { cmake_build -> rocmtest onnx: onnxnode('cdna') { cmake_build ->
stage("Onnx runtime") { stage("Onnx runtime") {
sh ''' sh '''
apt install half apt install half
......
...@@ -131,7 +131,7 @@ In this case, we can create `argument <migraphx::argument>` objects directly fro ...@@ -131,7 +131,7 @@ In this case, we can create `argument <migraphx::argument>` objects directly fro
std::vector<float> results_vector(64); std::vector<float> results_vector(64);
result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); }); result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
EXPECT(migraphx::verify_range(results_vector, sol)); EXPECT(migraphx::verify::verify_range(results_vector, sol));
An `argument <migraphx::argument>` can handle memory buffers from either the GPU or the CPU. An `argument <migraphx::argument>` can handle memory buffers from either the GPU or the CPU.
By default when running the `program <migraphx::program>`, buffers are allocated on the corresponding target. By default when running the `program <migraphx::program>`, buffers are allocated on the corresponding target.
......
...@@ -6,7 +6,7 @@ ARG PREFIX=/usr/local ...@@ -6,7 +6,7 @@ ARG PREFIX=/usr/local
RUN dpkg --add-architecture i386 RUN dpkg --add-architecture i386
# Add rocm repository # Add rocm repository
RUN sh -c 'echo deb [arch=amd64 trusted=yes] http://repo.radeon.com/rocm/apt/5.5/ focal main > /etc/apt/sources.list.d/rocm.list' RUN sh -c 'echo deb [arch=amd64 trusted=yes] http://repo.radeon.com/rocm/apt/5.6/ focal main > /etc/apt/sources.list.d/rocm.list'
# Install dependencies # Install dependencies
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
......
...@@ -1487,13 +1487,17 @@ quantize_int8(const program& prog, const target& ptarget, const quantize_int8_op ...@@ -1487,13 +1487,17 @@ quantize_int8(const program& prog, const target& ptarget, const quantize_int8_op
struct experimental_custom_op_base struct experimental_custom_op_base
{ {
experimental_custom_op_base() = default;
experimental_custom_op_base(const experimental_custom_op_base&) = default;
experimental_custom_op_base& operator=(const experimental_custom_op_base&) = default;
virtual ~experimental_custom_op_base() = default;
virtual std::string name() const = 0; virtual std::string name() const = 0;
virtual argument compute(context ctx, shape output, arguments inputs) const = 0; virtual argument compute(context ctx, shape output, arguments inputs) const = 0;
virtual shape compute_shape(shapes inputs) const = 0; virtual shape compute_shape(shapes inputs) const = 0;
virtual std::vector<size_t> output_alias(shapes) const { return {}; } virtual std::vector<size_t> output_alias(shapes) const { return {}; }
// TODO: Return target string instead of bool // TODO: Return target string instead of bool
virtual bool runs_on_offload_target() const = 0; virtual bool runs_on_offload_target() const = 0;
virtual ~experimental_custom_op_base() = default;
}; };
struct experimental_custom_op : interface_base<MIGRAPHX_HANDLE_BASE(experimental_custom_op)> struct experimental_custom_op : interface_base<MIGRAPHX_HANDLE_BASE(experimental_custom_op)>
......
...@@ -43,7 +43,7 @@ template <class T, class... Ts> ...@@ -43,7 +43,7 @@ template <class T, class... Ts>
using dependent_type = typename select_dependent_type<T, Ts...>::type; using dependent_type = typename select_dependent_type<T, Ts...>::type;
MIGRAPHX_EXPORT MIGRAPHX_EXPORT
bool normalize_attributes(operation& op, const std::vector<std::size_t>& lens); bool normalize_attributes(operation& op, const shape& input_shape);
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
......
...@@ -143,7 +143,7 @@ auto compute_shape_op(rank<2>, const T& x, const std::vector<shape>& inputs) ...@@ -143,7 +143,7 @@ auto compute_shape_op(rank<2>, const T& x, const std::vector<shape>& inputs)
if(inputs.empty()) if(inputs.empty())
MIGRAPHX_THROW("At least one input is required for " + x.name()); MIGRAPHX_THROW("At least one input is required for " + x.name());
dependent_type<operation, T> y = x; dependent_type<operation, T> y = x;
normalize_attributes(y, inputs[0].max_lens()); normalize_attributes(y, inputs[0]);
return any_cast<T>(y).normalize_compute_shape(inputs); return any_cast<T>(y).normalize_compute_shape(inputs);
} }
......
...@@ -35,6 +35,7 @@ ...@@ -35,6 +35,7 @@
namespace migraphx { namespace migraphx {
inline namespace MIGRAPHX_INLINE_NS { inline namespace MIGRAPHX_INLINE_NS {
namespace verify {
// Compute the value of a range // Compute the value of a range
template <class R> template <class R>
...@@ -196,6 +197,7 @@ bool verify_range(const R1& r1, const R2& r2, double tolerance = 80, double* out ...@@ -196,6 +197,7 @@ bool verify_range(const R1& r1, const R2& r2, double tolerance = 80, double* out
return error <= threshold; return error <= threshold;
} }
} // namespace verify
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace migraphx } // namespace migraphx
#endif #endif
...@@ -467,7 +467,7 @@ operation instruction::normalized_operator() const ...@@ -467,7 +467,7 @@ operation instruction::normalized_operator() const
if(this->need_normalization()) if(this->need_normalization())
{ {
auto s = this->inputs().front()->get_shape(); auto s = this->inputs().front()->get_shape();
if(not normalize_attributes(o, s.max_lens())) if(not normalize_attributes(o, s))
return this->get_operator(); return this->get_operator();
} }
return o; return o;
......
/* /*
* The MIT License (MIT) * The MIT License (MIT)
* *
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved. * Copyright (c) 2015-2023 Advanced Micro Devices, Inc. All rights reserved.
* *
* Permission is hereby granted, free of charge, to any person obtaining a copy * Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal * of this software and associated documentation files (the "Software"), to deal
...@@ -35,8 +35,9 @@ inline namespace MIGRAPHX_INLINE_NS { ...@@ -35,8 +35,9 @@ inline namespace MIGRAPHX_INLINE_NS {
* vec: the vector attribute to normalize * vec: the vector attribute to normalize
* axes: the operator's axes attribute if it exists, empty otherwise * axes: the operator's axes attribute if it exists, empty otherwise
* val: the normalize_axes key and options. Ex: normalize["axes"] = * val: the normalize_axes key and options. Ex: normalize["axes"] =
* value::array{normalize_attribute::include_min}; lens: shape dimensions passed when calling * value::array{normalize_attribute::include_min};
* normalize_attributes(op&, lens) * input_shape: input shape passed when calling
* normalize_attributes(op&, input_shape)
* *
* See normalize_attribute.hpp for explaining the options. * See normalize_attribute.hpp for explaining the options.
*/ */
...@@ -44,11 +45,11 @@ template <class Message> ...@@ -44,11 +45,11 @@ template <class Message>
auto tune_attribute(const std::vector<int64_t>& vec, auto tune_attribute(const std::vector<int64_t>& vec,
const std::vector<int64_t>& axes, const std::vector<int64_t>& axes,
const value& val, const value& val,
const std::vector<std::size_t>& lens, const shape& input_shape,
Message m) Message m)
{ {
std::vector<int64_t> result(vec); std::vector<int64_t> result(vec);
int64_t n_rank = lens.size(); int64_t n_rank = input_shape.ndim();
std::vector<op::normalize_attribute> vec_attrs = val.to_vector<op::normalize_attribute>(); std::vector<op::normalize_attribute> vec_attrs = val.to_vector<op::normalize_attribute>();
if(contains(vec_attrs, op::normalize_attribute::use_output)) if(contains(vec_attrs, op::normalize_attribute::use_output))
{ {
...@@ -56,9 +57,28 @@ auto tune_attribute(const std::vector<int64_t>& vec, ...@@ -56,9 +57,28 @@ auto tune_attribute(const std::vector<int64_t>& vec,
} }
std::vector<int64_t> max_vals(vec.size(), n_rank); std::vector<int64_t> max_vals(vec.size(), n_rank);
if(contains(vec_attrs, op::normalize_attribute::use_len)) if(contains(vec_attrs, op::normalize_attribute::use_len))
{ {
std::transform(axes.begin(), axes.end(), max_vals.begin(), [&](auto i) { return lens[i]; }); if(input_shape.dynamic())
{
std::transform(axes.begin(), axes.end(), max_vals.begin(), [&](auto i) {
const auto& dd = input_shape.dyn_dims().at(i);
if(not dd.is_fixed())
{
MIGRAPHX_THROW(
"NORMALIZE_ATTR: 'use_lens' on a non-fixed dynamic dimension, axis=" +
std::to_string(i));
}
return dd.max;
});
}
else
{
std::transform(axes.begin(), axes.end(), max_vals.begin(), [&](auto i) {
return input_shape.lens().at(i);
});
}
} }
if(contains(vec_attrs, op::normalize_attribute::clip_max)) if(contains(vec_attrs, op::normalize_attribute::clip_max))
...@@ -159,9 +179,9 @@ auto tune_pad_attribute(const value& val) ...@@ -159,9 +179,9 @@ auto tune_pad_attribute(const value& val)
/** /**
* Assumptions: * Assumptions:
* Dimensions to pad start from the third dimension (index 2). * Dimensions to pad start from the third dimension (index 2).
* Called by compute_shape_op() with the `lens` of the first input. * Called by compute_shape_op() with the shape of the first input.
*/ */
bool normalize_attributes(operation& op, const std::vector<std::size_t>& lens) bool normalize_attributes(operation& op, const shape& input_shape)
{ {
bool tuned = false; bool tuned = false;
auto attrs = op.attributes(); auto attrs = op.attributes();
...@@ -172,9 +192,9 @@ bool normalize_attributes(operation& op, const std::vector<std::size_t>& lens) ...@@ -172,9 +192,9 @@ bool normalize_attributes(operation& op, const std::vector<std::size_t>& lens)
auto padding_size = padding.size(); auto padding_size = padding.size();
auto padding_start = 2; auto padding_start = 2;
if(padding_size == 2 * (lens.size() - padding_start)) if(padding_size == 2 * (input_shape.ndim() - padding_start))
tuned = true; tuned = true;
else if(padding_size != (lens.size() - padding_start)) else if(padding_size != (input_shape.ndim() - padding_start))
MIGRAPHX_THROW("inconsistent padding size"); MIGRAPHX_THROW("inconsistent padding size");
else else
{ {
...@@ -205,7 +225,7 @@ bool normalize_attributes(operation& op, const std::vector<std::size_t>& lens) ...@@ -205,7 +225,7 @@ bool normalize_attributes(operation& op, const std::vector<std::size_t>& lens)
axes = val.at("axes").without_key().to_vector<int64_t>(); axes = val.at("axes").without_key().to_vector<int64_t>();
} }
auto vec = vv.to_vector<int64_t>(); auto vec = vv.to_vector<int64_t>();
auto result = tune_attribute(vec, axes, rv.without_key(), lens, message); auto result = tune_attribute(vec, axes, rv.without_key(), input_shape, message);
val[key] = result; val[key] = result;
op.from_value(val); op.from_value(val);
val = op.to_value(); val = op.to_value();
...@@ -214,7 +234,7 @@ bool normalize_attributes(operation& op, const std::vector<std::size_t>& lens) ...@@ -214,7 +234,7 @@ bool normalize_attributes(operation& op, const std::vector<std::size_t>& lens)
else else
{ {
auto num = vv.to<int64_t>(); auto num = vv.to<int64_t>();
auto result = tune_attribute({num}, {num}, rv.without_key(), lens, message); auto result = tune_attribute({num}, {num}, rv.without_key(), input_shape, message);
val[key] = result.front(); val[key] = result.front();
op.from_value(val); op.from_value(val);
val = op.to_value(); val = op.to_value();
......
...@@ -45,7 +45,7 @@ void normalize_ops::apply(module& m) const ...@@ -45,7 +45,7 @@ void normalize_ops::apply(module& m) const
auto s = inputs[0]->get_shape(); auto s = inputs[0]->get_shape();
migraphx::operation tuned_op = ins->get_operator(); migraphx::operation tuned_op = ins->get_operator();
if(normalize_attributes(tuned_op, s.max_lens())) if(normalize_attributes(tuned_op, s))
{ {
m.replace_instruction(ins, tuned_op, inputs); m.replace_instruction(ins, tuned_op, inputs);
ins->set_normalized(); ins->set_normalized();
......
...@@ -188,7 +188,9 @@ if(MIGRAPHX_ENABLE_MLIR) ...@@ -188,7 +188,9 @@ if(MIGRAPHX_ENABLE_MLIR)
find_package(rocMLIR 1.0.0 CONFIG REQUIRED) find_package(rocMLIR 1.0.0 CONFIG REQUIRED)
message(STATUS "Build with rocMLIR::rockCompiler ${rocMLIR_VERSION}") message(STATUS "Build with rocMLIR::rockCompiler ${rocMLIR_VERSION}")
target_compile_definitions(migraphx_gpu PRIVATE "-DMIGRAPHX_MLIR") target_compile_definitions(migraphx_gpu PRIVATE "-DMIGRAPHX_MLIR")
target_link_libraries(migraphx_gpu PUBLIC rocMLIR::rockCompiler) # Make this private to avoid multiple inclusions of LLVM symbols.
# TODO: Fix rocMLIR's library to hide LLVM internals.
target_link_libraries(migraphx_gpu PRIVATE rocMLIR::rockCompiler)
endif() endif()
if(MIGRAPHX_USE_HIPRTC) if(MIGRAPHX_USE_HIPRTC)
......
...@@ -41,8 +41,6 @@ struct miopen_contiguous : unary_device<miopen_contiguous, &device::contiguous> ...@@ -41,8 +41,6 @@ struct miopen_contiguous : unary_device<miopen_contiguous, &device::contiguous>
shape compute_shape(const std::vector<shape>& inputs) const shape compute_shape(const std::vector<shape>& inputs) const
{ {
check_shapes{inputs, *this}.has(2); check_shapes{inputs, *this}.has(2);
if(inputs.front().standard())
return inputs.front();
auto lens = inputs.at(0).lens(); auto lens = inputs.at(0).lens();
auto t = inputs.at(0).type(); auto t = inputs.at(0).type();
return {t, lens}; return {t, lens};
......
...@@ -122,12 +122,14 @@ struct source_location_capture ...@@ -122,12 +122,14 @@ struct source_location_capture
{ {
T x; T x;
source_location loc; source_location loc;
template <class U, class = decltype(T(U{}))> // declval is a workaround since default constructor for "U" is not working with rocm-5.6
template <class U>
static U&& declval();
template <class U, class = decltype(T(declval<U>()))>
constexpr source_location_capture(U px, source_location ploc = source_location{}) constexpr source_location_capture(U px, source_location ploc = source_location{})
: x(px), loc(ploc) : x(px), loc(ploc)
{ {
} }
constexpr operator source_location() const { return loc; } constexpr operator source_location() const { return loc; }
constexpr operator T() const { return x; } constexpr operator T() const { return x; }
......
...@@ -389,14 +389,20 @@ struct mlir_program ...@@ -389,14 +389,20 @@ struct mlir_program
mlir_operation_state& add_attributes(const std::vector<named_attribute_t>& named_attrs) mlir_operation_state& add_attributes(const std::vector<named_attribute_t>& named_attrs)
{ {
auto attributes = prog->name_attributes(named_attrs); auto attributes = prog->name_attributes(named_attrs);
if(not attributes.empty())
{
mlirOperationStateAddAttributes(&op_state, attributes.size(), attributes.data()); mlirOperationStateAddAttributes(&op_state, attributes.size(), attributes.data());
}
return *this; return *this;
} }
mlir_operation_state& add_attribute_value(const value& v) mlir_operation_state& add_attribute_value(const value& v)
{ {
auto attributes = prog->name_attributes(v); auto attributes = prog->name_attributes(v);
if(not attributes.empty())
{
mlirOperationStateAddAttributes(&op_state, attributes.size(), attributes.data()); mlirOperationStateAddAttributes(&op_state, attributes.size(), attributes.data());
}
return *this; return *this;
} }
...@@ -419,13 +425,19 @@ struct mlir_program ...@@ -419,13 +425,19 @@ struct mlir_program
return shape{r.type(), r.lens()}; return shape{r.type(), r.lens()};
}); });
auto x = prog->make_tensors(reshaped); auto x = prog->make_tensors(reshaped);
if(not x.empty())
{
mlirOperationStateAddResults(&op_state, x.size(), x.data()); mlirOperationStateAddResults(&op_state, x.size(), x.data());
}
return *this; return *this;
} }
mlir_operation_state& add_operands(const std::vector<MlirValue>& inputs) mlir_operation_state& add_operands(const std::vector<MlirValue>& inputs)
{
if(not inputs.empty())
{ {
mlirOperationStateAddOperands(&op_state, inputs.size(), inputs.data()); mlirOperationStateAddOperands(&op_state, inputs.size(), inputs.data());
}
return *this; return *this;
} }
...@@ -435,7 +447,10 @@ struct mlir_program ...@@ -435,7 +447,10 @@ struct mlir_program
std::transform(regions.begin(), regions.end(), mregions.begin(), [](const auto& r) { std::transform(regions.begin(), regions.end(), mregions.begin(), [](const auto& r) {
return r.get(); return r.get();
}); });
if(not mregions.empty())
{
mlirOperationStateAddOwnedRegions(&op_state, mregions.size(), mregions.data()); mlirOperationStateAddOwnedRegions(&op_state, mregions.size(), mregions.data());
}
mlir_operation op(mlirOperationCreate(&op_state)); mlir_operation op(mlirOperationCreate(&op_state));
// Release memory since mlir_operation owns it // Release memory since mlir_operation owns it
for(auto& r : regions) for(auto& r : regions)
...@@ -607,12 +622,12 @@ struct mlir_program ...@@ -607,12 +622,12 @@ struct mlir_program
mlir_pass_manager pm_back{mlirPassManagerCreate(ctx.get())}; mlir_pass_manager pm_back{mlirPassManagerCreate(ctx.get())};
// 1st pipeline to call // 1st pipeline to call
mlirMIGraphXAddHighLevelPipeline(pm_front.get()); mlirMIGraphXAddHighLevelPipeline(pm_front.get());
mlirPassManagerRun(pm_front.get(), mmodule.get()); mlirPassManagerRunOnOp(pm_front.get(), mlirModuleGetOperation(mmodule.get()));
// 2nd pipeline to call // 2nd pipeline to call
get_module_tuned(); get_module_tuned();
mlirMIGraphXAddBackendPipeline(pm_back.get(), target_arch.c_str()); mlirMIGraphXAddBackendPipeline(pm_back.get(), target_arch.c_str());
mlirPassManagerRun(pm_back.get(), mmodule.get()); mlirPassManagerRunOnOp(pm_back.get(), mlirModuleGetOperation(mmodule.get()));
code_object_op op{}; code_object_op op{};
op.symbol_name = sym_name; op.symbol_name = sym_name;
...@@ -701,6 +716,11 @@ struct mlir_program ...@@ -701,6 +716,11 @@ struct mlir_program
bool get_module_tuned() const bool get_module_tuned() const
{ {
static mlir_tuning_table tuning_table = create_tuning_table(); static mlir_tuning_table tuning_table = create_tuning_table();
// The tuning table as currently implemented is currently not
// thread safe. This will be fixed in the future. For now,
// stick a mutex around all tuning table interaction.
static std::mutex lock;
std::lock_guard<std::mutex> guard(lock);
if(!mlirRockTuningSetFromTable(tuning_table.get(), mmodule.get())) if(!mlirRockTuningSetFromTable(tuning_table.get(), mmodule.get()))
{ {
const char* prob_config = mlirRockTuningGetKey(tuning_table.get(), mmodule.get()); const char* prob_config = mlirRockTuningGetKey(tuning_table.get(), mmodule.get());
...@@ -778,9 +798,6 @@ code_object_op compile_mlir(const context&, module m, const std::vector<instruct ...@@ -778,9 +798,6 @@ code_object_op compile_mlir(const context&, module m, const std::vector<instruct
{ {
adjust_param_shapes(m, inputs); adjust_param_shapes(m, inputs);
const bool trace = enabled(MIGRAPHX_TRACE_MLIR{}); const bool trace = enabled(MIGRAPHX_TRACE_MLIR{});
// set mutex while llvm thread support is disabled.
static std::mutex g_mlirc_mutex; // NOLINT
const std::lock_guard<std::mutex> lock(g_mlirc_mutex);
if(trace) if(trace)
std::cout << m << std::endl; std::cout << m << std::endl;
......
...@@ -35,7 +35,7 @@ bool verify_args(const std::string& name, ...@@ -35,7 +35,7 @@ bool verify_args(const std::string& name,
bool passed = true; bool passed = true;
visit_all(ref_arg, target_arg)([&](auto ref, auto target) { visit_all(ref_arg, target_arg)([&](auto ref, auto target) {
double error; double error;
passed = verify_range(ref, target, tolerance, &error); passed = verify::verify_range(ref, target, tolerance, &error);
if(not passed) if(not passed)
{ {
// TODO: Check for nans // TODO: Check for nans
...@@ -45,27 +45,27 @@ bool verify_args(const std::string& name, ...@@ -45,27 +45,27 @@ bool verify_args(const std::string& name,
std::cout << "ref:" << ref << std::endl; std::cout << "ref:" << ref << std::endl;
if(target.size() < 32) if(target.size() < 32)
std::cout << "target:" << target << std::endl; std::cout << "target:" << target << std::endl;
if(range_zero(ref)) if(verify::range_zero(ref))
std::cout << "Ref data is all zeros" << std::endl; std::cout << "Ref data is all zeros" << std::endl;
if(range_zero(target)) if(verify::range_zero(target))
std::cout << "Target data is all zeros" << std::endl; std::cout << "Target data is all zeros" << std::endl;
auto mxdiff = max_diff(ref, target); auto mxdiff = verify::max_diff(ref, target);
std::cout << "Max diff: " << mxdiff << std::endl; std::cout << "Max diff: " << mxdiff << std::endl;
auto idx = mismatch_idx(ref, target, float_equal); auto idx = verify::mismatch_idx(ref, target, float_equal);
if(idx < range_distance(ref)) if(idx < verify::range_distance(ref))
{ {
std::cout << "Mismatch at " << idx << ": " << ref[idx] << " != " << target[idx] std::cout << "Mismatch at " << idx << ": " << ref[idx] << " != " << target[idx]
<< std::endl; << std::endl;
} }
auto ref_nan_idx = find_idx(ref, not_finite); auto ref_nan_idx = find_idx(ref, verify::not_finite);
if(ref_nan_idx >= 0) if(ref_nan_idx >= 0)
std::cout << "Non finite number found in ref at " << ref_nan_idx << ": " std::cout << "Non finite number found in ref at " << ref_nan_idx << ": "
<< ref[ref_nan_idx] << std::endl; << ref[ref_nan_idx] << std::endl;
auto target_nan_idx = find_idx(target, not_finite); auto target_nan_idx = find_idx(target, verify::not_finite);
if(target_nan_idx >= 0) if(target_nan_idx >= 0)
std::cout << "Non finite number found in target at " << target_nan_idx << ": " std::cout << "Non finite number found in target at " << target_nan_idx << ": "
<< target[target_nan_idx] << std::endl; << target[target_nan_idx] << std::endl;
...@@ -73,27 +73,27 @@ bool verify_args(const std::string& name, ...@@ -73,27 +73,27 @@ bool verify_args(const std::string& name,
} }
else else
{ {
if(range_zero(ref)) if(verify::range_zero(ref))
std::cout << "Ref data is all zeros" << std::endl; std::cout << "Ref data is all zeros" << std::endl;
if(range_zero(target)) if(verify::range_zero(target))
std::cout << "Target data is all zeros" << std::endl; std::cout << "Target data is all zeros" << std::endl;
// auto mxdiff = max_diff(ref, target); // auto mxdiff = max_diff(ref, target);
// std::cout << "Max diff: " << mxdiff << std::endl; // std::cout << "Max diff: " << mxdiff << std::endl;
// auto idx = mismatch_idx(ref, target, float_equal); // auto idx = mismatch_idx(ref, target, float_equal);
// if(idx < range_distance(ref)) // if(idx < verify::range_distance(ref))
// { // {
// std::cout << "Mismatch at " << idx << ": " << ref[idx] << " != " << target[idx] // std::cout << "Mismatch at " << idx << ": " << ref[idx] << " != " << target[idx]
// << std::endl; // << std::endl;
// } // }
auto ref_nan_idx = find_idx(ref, not_finite); auto ref_nan_idx = find_idx(ref, verify::not_finite);
if(ref_nan_idx >= 0) if(ref_nan_idx >= 0)
std::cout << "Non finite number found in ref at " << ref_nan_idx << ": " std::cout << "Non finite number found in ref at " << ref_nan_idx << ": "
<< ref[ref_nan_idx] << std::endl; << ref[ref_nan_idx] << std::endl;
auto target_nan_idx = find_idx(target, not_finite); auto target_nan_idx = find_idx(target, verify::not_finite);
if(target_nan_idx >= 0) if(target_nan_idx >= 0)
std::cout << "Non finite number found in target at " << target_nan_idx << ": " std::cout << "Non finite number found in target at " << target_nan_idx << ": "
<< target[target_nan_idx] << std::endl; << target[target_nan_idx] << std::endl;
......
...@@ -80,7 +80,7 @@ TEST_CASE(mul_literal_round_test) ...@@ -80,7 +80,7 @@ TEST_CASE(mul_literal_round_test)
migraphx::target gpu_t = migraphx::make_target("gpu"); migraphx::target gpu_t = migraphx::make_target("gpu");
run_prog(p, gpu_t, m, gpu_result); run_prog(p, gpu_t, m, gpu_result);
EXPECT(migraphx::verify_range(ref_result, gpu_result)); EXPECT(migraphx::verify::verify_range(ref_result, gpu_result));
} }
int main(int argc, const char* argv[]) { test::run(argc, argv); } int main(int argc, const char* argv[]) { test::run(argc, argv); }
...@@ -64,7 +64,7 @@ TEST_CASE(host_same_buffer_copy) ...@@ -64,7 +64,7 @@ TEST_CASE(host_same_buffer_copy)
auto result = p.eval(pp).back(); auto result = p.eval(pp).back();
std::vector<float> results_vector(ss.elements(), -1); std::vector<float> results_vector(ss.elements(), -1);
result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); }); result.visit([&](auto output) { results_vector.assign(output.begin(), output.end()); });
EXPECT(migraphx::verify_range(c_vec, results_vector)); EXPECT(migraphx::verify::verify_range(c_vec, results_vector));
} }
TEST_CASE(arguments_lifetime) TEST_CASE(arguments_lifetime)
......
...@@ -52,7 +52,7 @@ TEST_CASE(gpu_target_copy) ...@@ -52,7 +52,7 @@ TEST_CASE(gpu_target_copy)
std::vector<int8_t> val_final; std::vector<int8_t> val_final;
ref_arg_final.visit([&](auto v) { val_final.assign(v.begin(), v.end()); }); ref_arg_final.visit([&](auto v) { val_final.assign(v.begin(), v.end()); });
EXPECT(migraphx::verify_range(val_orig, val_final)); EXPECT(migraphx::verify::verify_range(val_orig, val_final));
} }
TEST_CASE(int8_quantization) TEST_CASE(int8_quantization)
...@@ -118,9 +118,9 @@ TEST_CASE(int8_quantization) ...@@ -118,9 +118,9 @@ TEST_CASE(int8_quantization)
// the regular pipeline uses the rewrite_quantization in the much // the regular pipeline uses the rewrite_quantization in the much
// earlier stage. // earlier stage.
if(migraphx::gpu::mlir_enabled()) if(migraphx::gpu::mlir_enabled())
EXPECT(migraphx::verify_range(ref_result, gpu_result, 1e5)); EXPECT(migraphx::verify::verify_range(ref_result, gpu_result, 1e5));
else else
EXPECT(migraphx::verify_range(ref_result, gpu_result)); EXPECT(migraphx::verify::verify_range(ref_result, gpu_result));
} }
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment