Commit eb4dfb49 authored by charlie's avatar charlie
Browse files

Merge branch 'develop' of github.com:ROCmSoftwarePlatform/AMDMIGraphX into dyn_nms

parents 20394c35 2783c649
#####################################################################################
# The MIT License (MIT)
#
# Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#####################################################################################
cmake_minimum_required(VERSION 3.5)
project (custom_hip_kernel)
set (CMAKE_CXX_STANDARD 14)
set (EXAMPLE custom_op_hip_kernel)
list (APPEND CMAKE_PREFIX_PATH /opt/rocm/hip /opt/rocm)
find_package (migraphx REQUIRED)
find_package (hip REQUIRED)
message("source file: " ${EXAMPLE}.cpp " ---> bin: " ${EXAMPLE})
add_executable(${EXAMPLE} ${EXAMPLE}.cpp)
target_link_libraries(${EXAMPLE} migraphx::c hip::device)
# Custom Kernel using MIGraphX API.
This is an example of a custom operator implementation using MIGraphX's C/C++ APIs. It also demonstrates how to use this custom op in conjunction with rest of MIGraphX operators to build and run MIGraphX program on GPU.
Kernels can be written in either HIP, MIOpen, or by using RocBLAS library. This particular example uses **HIP**.
To build the example, ensure ROCm is installed at `/opt/rocm`.
1. `export LD_LIBRARY_PATH=/opt/rocm/lib:$LD_LIBRARY_PATH`
2. `cd $MIGRAPHX_SRC/examples/migraphx/custom_op_hip_kernel/`
3. `mkdir build && cd build`
4. `CXX=/opt/rocm/llvm/bin/clang++ cmake .. && make`
5. `./custom_op_hip_kernel`
\ No newline at end of file
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <algorithm>
#include <hip/hip_runtime.h>
#include <migraphx/migraphx.hpp> // MIGraphX's C++ API
#include <numeric>
#define MIGRAPHX_HIP_ASSERT(x) (assert((x) == hipSuccess))
/*
* Square each element in the array A and write to array C.
*/
template <typename T>
__global__ void vector_square(T* C_d, const T* A_d, size_t N)
{
size_t offset = (hipBlockIdx_x * hipBlockDim_x + hipThreadIdx_x);
size_t stride = hipBlockDim_x * hipGridDim_x;
for(size_t i = offset; i < N; i += stride)
{
C_d[i] = A_d[i] * A_d[i];
}
}
struct square_custom_op final : migraphx::experimental_custom_op_base
{
virtual std::string name() const override { return "square_custom_op"; }
virtual migraphx::argument
compute(migraphx::context ctx, migraphx::shape, migraphx::arguments inputs) const override
{
// if compile options has offload_copy = true then, parameters and outputs will be
// automatically copied to and from GPUs' memory. Here assume that `inputs` arguments are
// already in the GPU, so no need to do Malloc, Free or Memcpy. Last element in the `inputs`
// is output argument, so it should be returned from compute method.
auto* input_buffer = reinterpret_cast<float*>(inputs[0].data());
auto* output_buffer = reinterpret_cast<float*>(inputs[1].data());
size_t n_elements = inputs[0].get_shape().bytes() / sizeof(inputs[0].get_shape().type());
MIGRAPHX_HIP_ASSERT(hipSetDevice(0));
const unsigned blocks = 512;
const unsigned threads_per_block = 256;
// cppcheck-suppress UseDeviceLaunch
hipLaunchKernelGGL(vector_square,
dim3(blocks),
dim3(threads_per_block),
0,
ctx.get_queue<hipStream_t>(),
output_buffer,
input_buffer,
n_elements);
return inputs[1];
}
virtual migraphx::shape compute_shape(migraphx::shapes inputs) const override
{
if(inputs.size() != 2)
{
throw std::runtime_error("square_custom_op must have 2 arguments");
}
if(inputs[0] != inputs[1])
{
throw std::runtime_error("Inputs to the square_custom_op must have same Shape");
}
return inputs.back();
}
};
int main(int argc, const char* argv[])
{
square_custom_op square_op;
migraphx::register_experimental_custom_op(square_op);
migraphx::program p;
migraphx::shape s{migraphx_shape_float_type, {32, 256}};
migraphx::module m = p.get_main_module();
auto x = m.add_parameter("x", s);
auto neg_ins = m.add_instruction(migraphx::operation("neg"), x);
// add allocation for the custom_kernel's output buffer
auto alloc = m.add_allocation(s);
auto custom_kernel =
m.add_instruction(migraphx::operation("square_custom_op"), {neg_ins, alloc});
auto relu_ins = m.add_instruction(migraphx::operation("relu"), {custom_kernel});
m.add_return({relu_ins});
migraphx::compile_options options;
// set offload copy to true for GPUs
options.set_offload_copy();
p.compile(migraphx::target("gpu"), options);
migraphx::program_parameters pp;
std::vector<float> x_data(s.bytes() / sizeof(s.type()));
std::iota(x_data.begin(), x_data.end(), 0);
pp.add("x", migraphx::argument(s, x_data.data()));
auto results = p.eval(pp);
auto result = results[0];
std::vector<float> expected_result = x_data;
std::transform(expected_result.begin(),
expected_result.end(),
expected_result.begin(),
[](auto i) { return std::pow(i, 2); });
if(bool{result == migraphx::argument(s, expected_result.data())})
{
std::cout << "Successfully executed custom HIP kernel example\n";
}
else
{
std::cout << "Custom HIP kernel example failed\n";
}
return 0;
}
#####################################################################################
# The MIT License (MIT)
#
# Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#####################################################################################
cmake_minimum_required(VERSION 3.5)
project (custom_miopen_kernel)
set (CMAKE_CXX_STANDARD 14)
set (EXAMPLE custom_op_miopen_kernel)
list (APPEND CMAKE_PREFIX_PATH /opt/rocm/hip /opt/rocm)
find_package (migraphx REQUIRED)
find_package (miopen REQUIRED)
message("source file: " ${EXAMPLE}.cpp " ---> bin: " ${EXAMPLE})
add_executable(${EXAMPLE} ${EXAMPLE}.cpp)
target_link_libraries(${EXAMPLE} migraphx::c MIOpen)
# Custom MIOpen Kernel using MIGraphX API.
This is an example of a custom operator implementation using MIGraphX's C/C++ APIs. It also demonstrates how to use this custom op in conjunction with rest of MIGraphX operators to build and run MIGraphX program on GPU.
Kernels can be written in either HIP, MIOpen, or by using RocBLAS library. This particular example uses **MIOpen** library calls.
To build and run example, ensure ROCm is installed at `/opt/rocm`.
1. `export LD_LIBRARY_PATH=/opt/rocm/lib:$LD_LIBRARY_PATH`
2. `cd $MIGRAPHX_SRC/examples/migraphx/custom_op_miopen_kernel/`
3. `mkdir build && cd build`
4. `cmake .. && make`
5. `./custom_op_miopen_kernel`
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <algorithm>
#include <hip/hip_runtime.h>
#include <migraphx/migraphx.h>
#include <miopen/miopen.h>
#include <migraphx/migraphx.hpp> // MIGraphX's C++ API
#include <numeric>
#include <stdexcept>
#define MIGRAPHX_MIOPEN_ASSERT(x) (assert((x) == miopenStatusSuccess))
#define MIGRAPHX_HIP_ASSERT(x) (assert((x) == hipSuccess))
inline miopenTensorDescriptor_t make_miopen_tensor(const migraphx::shape& s, bool pack = false)
{
miopenTensorDescriptor_t t;
MIGRAPHX_MIOPEN_ASSERT(miopenCreateTensorDescriptor(&t));
// Convert to ints
auto s_lens = s.lengths();
std::vector<int> lens(s_lens.begin(), s_lens.end());
auto s_strides = s.strides();
std::vector<int> strides(s_strides.begin(), s_strides.end());
miopenDataType_t d;
if(s.type() == migraphx_shape_float_type)
d = miopenFloat;
else if(s.type() == migraphx_shape_half_type)
d = miopenHalf;
else if(s.type() == migraphx_shape_int32_type)
d = miopenInt32;
else if(s.type() == migraphx_shape_int8_type)
{
if(pack)
{
// update the lens and corresponding strides
d = miopenInt8x4;
lens[1] = ((lens[1] + 3) / 4) * 4;
strides[0] = strides[1] * lens[1];
}
else
{
d = miopenInt8;
}
}
else
{
throw("MAKE_TENSOR: unsupported type");
}
miopenSetTensorDescriptor(t, d, s_lens.size(), lens.data(), strides.data());
return t;
}
inline auto make_miopen_handle(migraphx::context& ctx)
{
MIGRAPHX_HIP_ASSERT(hipSetDevice(0));
auto* stream = ctx.get_queue<hipStream_t>();
miopenHandle_t out;
MIGRAPHX_MIOPEN_ASSERT(miopenCreateWithStream(&out, stream));
return out;
}
inline auto make_activation_descriptor(miopenActivationMode_t mode,
double alpha = 0,
double beta = 0,
double gamma = 0)
{
miopenActivationDescriptor_t ad;
MIGRAPHX_MIOPEN_ASSERT(miopenCreateActivationDescriptor(&ad));
miopenSetActivationDescriptor(ad, mode, alpha, beta, gamma);
return ad;
}
struct abs_custom_op final : migraphx::experimental_custom_op_base
{
virtual std::string name() const override { return "abs_custom_op"; }
virtual migraphx::argument compute(migraphx::context ctx,
migraphx::shape output_shape,
migraphx::arguments args) const override
{
float alpha = 1;
float beta = 0;
// MIOpen kernel call takes raw buffer pointers for the TensorData. These Buffer pointers
// must be accompanied with Tensor Description e.g. shape, type, strides, dimensionality.
// Following `make_miopen_tensor` makes such tensor descriptors to pass as parameter to
// MIOpen kernel call.
auto y_desc = make_miopen_tensor(output_shape);
auto x_desc = make_miopen_tensor(args[0].get_shape());
// create MIOpen stream handle
auto miopen_handle = make_miopen_handle(ctx);
// MIOpen has generic kernel for many different kinds of activation functions.
// Each such generic call must be accompanied with description of what kind of activation
// computation to perform
auto ad = make_activation_descriptor(miopenActivationABS, 0, 0, 0);
miopenActivationForward(
miopen_handle, ad, &alpha, x_desc, args[0].data(), &beta, y_desc, args[1].data());
return args[1];
}
virtual migraphx::shape compute_shape(migraphx::shapes inputs) const override
{
if(inputs.size() != 2)
{
throw std::runtime_error("abs_custom_op must have two input arguments");
}
if(inputs[0] != inputs[1])
{
throw std::runtime_error("Input arguments to abs_custom_op must have same shape");
}
return inputs.back();
}
};
int main(int argc, const char* argv[])
{
abs_custom_op abs_op;
migraphx::register_experimental_custom_op(abs_op);
migraphx::program p;
migraphx::shape s{migraphx_shape_float_type, {32, 256}};
migraphx::module m = p.get_main_module();
auto x = m.add_parameter("x", s);
auto neg_ins = m.add_instruction(migraphx::operation("neg"), {x});
// add allocation for the custom_kernel's output buffer
auto alloc = m.add_allocation(s);
auto custom_kernel = m.add_instruction(migraphx::operation("abs_custom_op"), {neg_ins, alloc});
auto relu_ins = m.add_instruction(migraphx::operation("relu"), {custom_kernel});
m.add_return({relu_ins});
migraphx::compile_options options;
// set offload copy to true for GPUs
options.set_offload_copy();
p.compile(migraphx::target("gpu"), options);
migraphx::program_parameters prog_params;
std::vector<float> x_data(s.bytes() / sizeof(s.type()));
std::iota(x_data.begin(), x_data.end(), 0);
prog_params.add("x", migraphx::argument(s, x_data.data()));
auto results = p.eval(prog_params);
auto result = results[0];
std::vector<float> expected_result = x_data;
std::transform(expected_result.begin(),
expected_result.end(),
expected_result.begin(),
[](auto i) { return std::abs(i); });
if(bool{result == migraphx::argument(s, expected_result.data())})
{
std::cout << "Successfully executed custom MIOpen kernel example with MIGraphX\n";
}
else
{
std::cout << "Custom MIOpen kernel example failed\n";
}
return 0;
}
#####################################################################################
# The MIT License (MIT)
#
# Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
#####################################################################################
cmake_minimum_required(VERSION 3.5)
project (custom_rocblas_kernel)
set (CMAKE_CXX_STANDARD 14)
set (EXAMPLE custom_op_rocblas_kernel)
list (APPEND CMAKE_PREFIX_PATH /opt/rocm/hip /opt/rocm)
find_package (migraphx REQUIRED)
find_package (rocblas REQUIRED)
message("source file: " ${EXAMPLE}.cpp " ---> bin: " ${EXAMPLE})
add_executable(${EXAMPLE} ${EXAMPLE}.cpp)
target_link_libraries(${EXAMPLE} migraphx::c roc::rocblas)
# Custom rocBLAS Kernel using MIGraphX API.
This is an example of a custom operator implementation using MIGraphX's C/C++ APIs. It also demonstrates how to use this custom op in conjunction with rest of MIGraphX operators to build and run MIGraphX program on GPU.
Kernels can be written in either HIP, MIOpen, or by using RocBLAS library. This particular example uses **rocBLAS** library calls.
To build and run the example, ensure ROCm is installed at `/opt/rocm`.
1. `export LD_LIBRARY_PATH=/opt/rocm/lib:$LD_LIBRARY_PATH`
2. `cd $MIGRAPHX_SRC/examples/migraphx/custom_op_rocblas_kernel/`
3. `mkdir build && cd build`
4. `cmake .. && make`
5. `./custom_op_rocblas_kernel`
/*
* The MIT License (MIT)
*
* Copyright (c) 2015-2022 Advanced Micro Devices, Inc. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include <algorithm>
#include <hip/hip_runtime.h>
#include <rocblas.h>
#include <migraphx/migraphx.h>
#include <migraphx/migraphx.hpp> // MIGraphX's C++ API
#include <numeric>
#include <stdexcept>
#define MIGRAPHX_ROCBLAS_ASSERT(x) (assert((x) == rocblas_status::rocblas_status_success))
#define MIGRAPHX_HIP_ASSERT(x) (assert((x) == hipSuccess))
rocblas_handle create_rocblas_handle_ptr()
{
rocblas_handle handle;
MIGRAPHX_ROCBLAS_ASSERT(rocblas_create_handle(&handle));
return rocblas_handle{handle};
}
rocblas_handle create_rocblas_handle_ptr(migraphx::context& ctx)
{
MIGRAPHX_HIP_ASSERT(hipSetDevice(0));
rocblas_handle rb = create_rocblas_handle_ptr();
auto* stream = ctx.get_queue<hipStream_t>();
MIGRAPHX_ROCBLAS_ASSERT(rocblas_set_stream(rb, stream));
return rb;
}
struct sscal_custom_op final : migraphx::experimental_custom_op_base
{
virtual std::string name() const override { return "sscal_custom_op"; }
virtual migraphx::argument compute(migraphx::context ctx,
migraphx::shape output_shape,
migraphx::arguments args) const override
{
// create rocblas stream handle
auto rocblas_handle = create_rocblas_handle_ptr(ctx);
rocblas_int n = args[1].get_shape().lengths()[0];
float* alpha = reinterpret_cast<float*>(args[0].data());
float* vec_ptr = reinterpret_cast<float*>(args[1].data());
MIGRAPHX_ROCBLAS_ASSERT(rocblas_sscal(rocblas_handle, n, alpha, vec_ptr, 1));
return args[1];
}
virtual migraphx::shape compute_shape(migraphx::shapes inputs) const override
{
if(inputs.size() != 2)
{
throw std::runtime_error("sscal_custom_op must have 2 input arguments");
}
if(inputs[0].lengths().size() != 1 || inputs[0].lengths()[0] != 1)
{
throw std::runtime_error("first input argument to sscal_custom_op must be a scalar");
}
if(inputs[1].lengths().size() != 1)
{
throw std::runtime_error(
"second input argument to sscal_custom_op must be a vector with dimension one");
}
return inputs.back();
}
};
int main(int argc, const char* argv[])
{
// computes ReLU(neg(x) * scale)
sscal_custom_op sscal_op;
migraphx::register_experimental_custom_op(sscal_op);
migraphx::program p;
migraphx::shape x_shape{migraphx_shape_float_type, {8192}};
migraphx::shape scale_shape{migraphx_shape_float_type, {1}};
migraphx::module m = p.get_main_module();
auto x = m.add_parameter("x", x_shape);
auto scale = m.add_parameter("scale", scale_shape);
auto neg_ins = m.add_instruction(migraphx::operation("neg"), {x});
auto custom_kernel =
m.add_instruction(migraphx::operation("sscal_custom_op"), {scale, neg_ins});
auto relu_ins = m.add_instruction(migraphx::operation("relu"), {custom_kernel});
m.add_return({relu_ins});
migraphx::compile_options options;
// set offload copy to true for GPUs
options.set_offload_copy();
p.compile(migraphx::target("gpu"), options);
migraphx::program_parameters pp;
std::vector<float> x_data(x_shape.bytes() / sizeof(x_shape.type()));
std::vector<float> scale_data{-1};
std::iota(x_data.begin(), x_data.end(), 0);
pp.add("x", migraphx::argument(x_shape, x_data.data()));
pp.add("scale", migraphx::argument(scale_shape, scale_data.data()));
auto results = p.eval(pp);
auto result = results[0];
std::vector<float> expected_result = x_data;
if(bool{result == migraphx::argument(x_shape, expected_result.data())})
{
std::cout << "Successfully executed custom rocBLAS kernel example\n";
}
else
{
std::cout << "Custom rocBLAS kernel example failed\n";
}
return 0;
}
...@@ -80,7 +80,7 @@ ...@@ -80,7 +80,7 @@
"outputs": [], "outputs": [],
"source": [ "source": [
"if not os.path.exists(\"yolov4_fp16.mxr\"):\n", "if not os.path.exists(\"yolov4_fp16.mxr\"):\n",
" !/opt/rocm/bin/migraphx-driver compile ./utilities/yolov4.onnx --gpu --enable-offload-copy --fp16ref --binary -o yolov4_fp16.mxr\n", " !/opt/rocm/bin/migraphx-driver compile ./utilities/yolov4.onnx --gpu --enable-offload-copy --fp16 --binary -o yolov4_fp16.mxr\n",
"if not os.path.exists(\"yolov4.mxr\"):\n", "if not os.path.exists(\"yolov4.mxr\"):\n",
" !/opt/rocm/bin/migraphx-driver compile ./utilities/yolov4.onnx --gpu --enable-offload-copy --binary -o yolov4.mxr" " !/opt/rocm/bin/migraphx-driver compile ./utilities/yolov4.onnx --gpu --enable-offload-copy --binary -o yolov4.mxr"
] ]
......
/* /*
* The MIT License (MIT) * The MIT License (MIT)
* *
...@@ -21,10 +22,10 @@ ...@@ -21,10 +22,10 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE. * THE SOFTWARE.
*/ */
#include <migraphx/operators.hpp> #include <migraphx/make_op.hpp>
#include <migraphx/program.hpp> #include <migraphx/program.hpp>
#include <migraphx/generate.hpp> #include <migraphx/generate.hpp>
#include <migraphx/apply_alpha_beta.hpp> #include <migraphx/json.hpp>
#include "models.hpp" #include "models.hpp"
namespace migraphx { namespace migraphx {
...@@ -34,173 +35,189 @@ inline namespace MIGRAPHX_INLINE_NS { ...@@ -34,173 +35,189 @@ inline namespace MIGRAPHX_INLINE_NS {
migraphx::program alexnet(unsigned batch) // NOLINT(readability-function-size) migraphx::program alexnet(unsigned batch) // NOLINT(readability-function-size)
{ {
migraphx::program p; migraphx::program p;
auto* mm = p.get_main_module(); migraphx::module_ref mmain = p.get_main_module();
auto m0 = auto x_main_module_0 = mmain->add_literal(migraphx::abs(
mm->add_parameter("0", migraphx::shape{migraphx::shape::float_type, {batch, 3, 224, 224}}); migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {1}}, 0)));
auto mx0 = mm->add_literal( auto x_main_module_1 = mmain->add_literal(migraphx::abs(
migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {1000}}, 0)); migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {1}}, 1)));
auto mx1 = mm->add_literal( auto x_main_module_2 = mmain->add_literal(migraphx::abs(
migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {1000, 4096}}, 1)); migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {1}}, 2)));
auto mx2 = mm->add_literal( auto x_input_1 = mmain->add_parameter(
migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {4096}}, 2)); "input.1", migraphx::shape{migraphx::shape::float_type, {batch, 3, 224, 224}});
auto mx3 = mm->add_literal( auto x_main_module_4 = mmain->add_literal(
migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {4096, 4096}}, 3)); migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {4096, 4096}}, 3));
auto mx4 = mm->add_literal( auto x_main_module_5 = mmain->add_literal(
migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {4096}}, 4)); migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {4096}}, 4));
auto mx5 = mm->add_literal( auto x_main_module_6 = mmain->add_literal(
migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {4096, 9216}}, 5)); migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {4096, 9216}}, 5));
auto mx6 = mm->add_literal( auto x_main_module_7 = mmain->add_literal(
migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {256}}, 6)); migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {4096}}, 6));
auto mx7 = mm->add_literal(migraphx::generate_literal( auto x_main_module_8 = mmain->add_literal(
migraphx::shape{migraphx::shape::float_type, {256, 256, 3, 3}}, 7)); migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {1000, 4096}}, 7));
auto mx8 = mm->add_literal( auto x_main_module_9 = mmain->add_literal(
migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {256}}, 8)); migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {1000}}, 8));
auto mx9 = mm->add_literal(migraphx::generate_literal( auto x_main_module_10 = mmain->add_literal(migraphx::generate_literal(
migraphx::shape{migraphx::shape::float_type, {256, 384, 3, 3}}, 9)); migraphx::shape{migraphx::shape::float_type, {256, 384, 3, 3}}, 9));
auto mx10 = mm->add_literal( auto x_main_module_11 = mmain->add_literal(
migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {384}}, 10)); migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {256}}, 10));
auto mx11 = mm->add_literal(migraphx::generate_literal( auto x_main_module_12 = mmain->add_literal(migraphx::generate_literal(
migraphx::shape{migraphx::shape::float_type, {384, 192, 3, 3}}, 11)); migraphx::shape{migraphx::shape::float_type, {384, 192, 3, 3}}, 11));
auto mx12 = mm->add_literal( auto x_main_module_13 = mmain->add_literal(
migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {192}}, 12)); migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {384}}, 12));
auto mx13 = mm->add_literal(migraphx::generate_literal( auto x_main_module_14 = mmain->add_literal(migraphx::generate_literal(
migraphx::shape{migraphx::shape::float_type, {192, 64, 5, 5}}, 13)); migraphx::shape{migraphx::shape::float_type, {192, 64, 5, 5}}, 13));
auto mx14 = mm->add_literal( auto x_main_module_15 = mmain->add_literal(
migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {64}}, 14)); migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {192}}, 14));
auto mx15 = mm->add_literal(migraphx::generate_literal( auto x_main_module_16 = mmain->add_literal(migraphx::generate_literal(
migraphx::shape{migraphx::shape::float_type, {64, 3, 11, 11}}, 15)); migraphx::shape{migraphx::shape::float_type, {256, 256, 3, 3}}, 15));
migraphx::op::convolution convolution16; auto x_main_module_17 = mmain->add_literal(
convolution16.padding = {2, 2}; migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {256}}, 16));
convolution16.stride = {4, 4}; auto x_main_module_18 = mmain->add_literal(migraphx::generate_literal(
convolution16.dilation = {1, 1}; migraphx::shape{migraphx::shape::float_type, {64, 3, 11, 11}}, 17));
convolution16.group = 1; auto x_main_module_19 = mmain->add_literal(
auto mx16 = mm->add_instruction(convolution16, m0, mx15); migraphx::generate_literal(migraphx::shape{migraphx::shape::float_type, {64}}, 18));
migraphx::op::broadcast broadcast17; auto x_main_module_20 = mmain->add_instruction(
broadcast17.axis = 1; migraphx::make_op(
broadcast17.broadcast_lens = {batch, 64, 55, 55}; "convolution",
auto mx17 = mm->add_instruction(broadcast17, mx14); migraphx::from_json_string(
migraphx::op::add add18; "{dilation:[1,1],group:1,padding:[2,2,2,2],padding_mode:0,stride:[4,4]}")),
auto mx18 = mm->add_instruction(add18, mx16, mx17); x_input_1,
migraphx::op::relu relu19; x_main_module_18);
auto mx19 = mm->add_instruction(relu19, mx18); auto x_main_module_21 = mmain->add_instruction(
migraphx::op::pooling pooling20; migraphx::make_op("broadcast",
pooling20.mode = migraphx::op::pooling_mode::max; migraphx::from_json_string("{axis:1,out_lens:[1,64,55,55]}")),
pooling20.padding = {0, 0}; x_main_module_19);
pooling20.stride = {2, 2}; auto x_main_module_22 =
pooling20.lengths = {3, 3}; mmain->add_instruction(migraphx::make_op("add"), x_main_module_20, x_main_module_21);
auto mx20 = mm->add_instruction(pooling20, mx19); auto x_main_module_23 = mmain->add_instruction(migraphx::make_op("relu"), x_main_module_22);
migraphx::op::convolution convolution21; auto x_main_module_24 = mmain->add_instruction(
convolution21.padding = {2, 2}; migraphx::make_op(
convolution21.stride = {1, 1}; "pooling",
convolution21.dilation = {1, 1}; migraphx::from_json_string(
convolution21.group = 1; "{ceil_mode:0,lengths:[3,3],lp_order:2,mode:1,padding:[0,0,0,0],stride:[2,2]}")),
auto mx21 = mm->add_instruction(convolution21, mx20, mx13); x_main_module_23);
migraphx::op::broadcast broadcast22; auto x_main_module_25 = mmain->add_instruction(
broadcast22.axis = 1; migraphx::make_op(
broadcast22.broadcast_lens = {batch, 192, 27, 27}; "convolution",
auto mx22 = mm->add_instruction(broadcast22, mx12); migraphx::from_json_string(
migraphx::op::add add23; "{dilation:[1,1],group:1,padding:[2,2,2,2],padding_mode:0,stride:[1,1]}")),
auto mx23 = mm->add_instruction(add23, mx21, mx22); x_main_module_24,
migraphx::op::relu relu24; x_main_module_14);
auto mx24 = mm->add_instruction(relu24, mx23); auto x_main_module_26 = mmain->add_instruction(
migraphx::op::pooling pooling25; migraphx::make_op("broadcast",
pooling25.mode = migraphx::op::pooling_mode::max; migraphx::from_json_string("{axis:1,out_lens:[1,192,27,27]}")),
pooling25.padding = {0, 0}; x_main_module_15);
pooling25.stride = {2, 2}; auto x_main_module_27 =
pooling25.lengths = {3, 3}; mmain->add_instruction(migraphx::make_op("add"), x_main_module_25, x_main_module_26);
auto mx25 = mm->add_instruction(pooling25, mx24); auto x_main_module_28 = mmain->add_instruction(migraphx::make_op("relu"), x_main_module_27);
migraphx::op::convolution convolution26; auto x_main_module_29 = mmain->add_instruction(
convolution26.padding = {1, 1}; migraphx::make_op(
convolution26.stride = {1, 1}; "pooling",
convolution26.dilation = {1, 1}; migraphx::from_json_string(
convolution26.group = 1; "{ceil_mode:0,lengths:[3,3],lp_order:2,mode:1,padding:[0,0,0,0],stride:[2,2]}")),
auto mx26 = mm->add_instruction(convolution26, mx25, mx11); x_main_module_28);
migraphx::op::broadcast broadcast27; auto x_main_module_30 = mmain->add_instruction(
broadcast27.axis = 1; migraphx::make_op(
broadcast27.broadcast_lens = {batch, 384, 13, 13}; "convolution",
auto mx27 = mm->add_instruction(broadcast27, mx10); migraphx::from_json_string(
migraphx::op::add add28; "{dilation:[1,1],group:1,padding:[1,1,1,1],padding_mode:0,stride:[1,1]}")),
auto mx28 = mm->add_instruction(add28, mx26, mx27); x_main_module_29,
migraphx::op::relu relu29; x_main_module_12);
auto mx29 = mm->add_instruction(relu29, mx28); auto x_main_module_31 = mmain->add_instruction(
migraphx::op::convolution convolution30; migraphx::make_op("broadcast",
convolution30.padding = {1, 1}; migraphx::from_json_string("{axis:1,out_lens:[1,384,13,13]}")),
convolution30.stride = {1, 1}; x_main_module_13);
convolution30.dilation = {1, 1}; auto x_main_module_32 =
convolution30.group = 1; mmain->add_instruction(migraphx::make_op("add"), x_main_module_30, x_main_module_31);
auto mx30 = mm->add_instruction(convolution30, mx29, mx9); auto x_main_module_33 = mmain->add_instruction(migraphx::make_op("relu"), x_main_module_32);
migraphx::op::broadcast broadcast31; auto x_main_module_34 = mmain->add_instruction(
broadcast31.axis = 1; migraphx::make_op(
broadcast31.broadcast_lens = {batch, 256, 13, 13}; "convolution",
auto mx31 = mm->add_instruction(broadcast31, mx8); migraphx::from_json_string(
migraphx::op::add add32; "{dilation:[1,1],group:1,padding:[1,1,1,1],padding_mode:0,stride:[1,1]}")),
auto mx32 = mm->add_instruction(add32, mx30, mx31); x_main_module_33,
migraphx::op::relu relu33; x_main_module_10);
auto mx33 = mm->add_instruction(relu33, mx32); auto x_main_module_35 = mmain->add_instruction(
migraphx::op::convolution convolution34; migraphx::make_op("broadcast",
convolution34.padding = {1, 1}; migraphx::from_json_string("{axis:1,out_lens:[1,256,13,13]}")),
convolution34.stride = {1, 1}; x_main_module_11);
convolution34.dilation = {1, 1}; auto x_main_module_36 =
convolution34.group = 1; mmain->add_instruction(migraphx::make_op("add"), x_main_module_34, x_main_module_35);
auto mx34 = mm->add_instruction(convolution34, mx33, mx7); auto x_main_module_37 = mmain->add_instruction(migraphx::make_op("relu"), x_main_module_36);
migraphx::op::broadcast broadcast35; auto x_main_module_38 = mmain->add_instruction(
broadcast35.axis = 1; migraphx::make_op(
broadcast35.broadcast_lens = {batch, 256, 13, 13}; "convolution",
auto mx35 = mm->add_instruction(broadcast35, mx6); migraphx::from_json_string(
migraphx::op::add add36; "{dilation:[1,1],group:1,padding:[1,1,1,1],padding_mode:0,stride:[1,1]}")),
auto mx36 = mm->add_instruction(add36, mx34, mx35); x_main_module_37,
migraphx::op::relu relu37; x_main_module_16);
auto mx37 = mm->add_instruction(relu37, mx36); auto x_main_module_39 = mmain->add_instruction(
migraphx::op::pooling pooling38; migraphx::make_op("broadcast",
pooling38.mode = migraphx::op::pooling_mode::max; migraphx::from_json_string("{axis:1,out_lens:[1,256,13,13]}")),
pooling38.padding = {0, 0}; x_main_module_17);
pooling38.stride = {2, 2}; auto x_main_module_40 =
pooling38.lengths = {3, 3}; mmain->add_instruction(migraphx::make_op("add"), x_main_module_38, x_main_module_39);
auto mx38 = mm->add_instruction(pooling38, mx37); auto x_main_module_41 = mmain->add_instruction(migraphx::make_op("relu"), x_main_module_40);
migraphx::op::flatten flatten39; auto x_main_module_42 = mmain->add_instruction(
flatten39.axis = 1; migraphx::make_op(
auto mx39 = mm->add_instruction(flatten39, mx38); "pooling",
migraphx::op::identity identity40; migraphx::from_json_string(
auto mx40 = mm->add_instruction(identity40, mx39); "{ceil_mode:0,lengths:[3,3],lp_order:2,mode:1,padding:[0,0,0,0],stride:[2,2]}")),
migraphx::op::transpose transpose41; x_main_module_41);
transpose41.dims = {1, 0}; auto x_main_module_43 = mmain->add_instruction(
auto mx41 = mm->add_instruction(transpose41, mx5); migraphx::make_op("reshape", migraphx::from_json_string("{dims:[1,9216]}")),
migraphx::op::multibroadcast multibroadcast42; x_main_module_42);
multibroadcast42.output_lens = {batch, 4096}; auto x_main_module_44 = mmain->add_instruction(
auto mx42 = mm->add_instruction(multibroadcast42, mx4); migraphx::make_op("transpose", migraphx::from_json_string("{permutation:[1,0]}")),
float dot43_alpha = 1; x_main_module_6);
float dot43_beta = 1; auto x_main_module_45 =
auto mx43 = migraphx::add_apply_alpha_beta( mmain->add_instruction(migraphx::make_op("dot"), x_main_module_43, x_main_module_44);
*mm, {mx40, mx41, mx42}, migraphx::make_op("dot"), dot43_alpha, dot43_beta); auto x_main_module_46 = mmain->add_instruction(
migraphx::op::relu relu44; migraphx::make_op("multibroadcast", migraphx::from_json_string("{out_lens:[1,4096]}")),
auto mx44 = mm->add_instruction(relu44, mx43); x_main_module_7);
migraphx::op::identity identity45; auto x_main_module_47 = mmain->add_instruction(
auto mx45 = mm->add_instruction(identity45, mx44); migraphx::make_op("multibroadcast", migraphx::from_json_string("{out_lens:[1,4096]}")),
migraphx::op::transpose transpose46; x_main_module_2);
transpose46.dims = {1, 0}; auto x_main_module_48 =
auto mx46 = mm->add_instruction(transpose46, mx3); mmain->add_instruction(migraphx::make_op("mul"), x_main_module_46, x_main_module_47);
migraphx::op::multibroadcast multibroadcast47; auto x_main_module_49 =
multibroadcast47.output_lens = {batch, 4096}; mmain->add_instruction(migraphx::make_op("add"), x_main_module_45, x_main_module_48);
auto mx47 = mm->add_instruction(multibroadcast47, mx2); auto x_main_module_50 = mmain->add_instruction(migraphx::make_op("relu"), x_main_module_49);
float dot48_alpha = 1; auto x_main_module_51 = mmain->add_instruction(
float dot48_beta = 1; migraphx::make_op("transpose", migraphx::from_json_string("{permutation:[1,0]}")),
auto mx48 = migraphx::add_apply_alpha_beta( x_main_module_4);
*mm, {mx45, mx46, mx47}, migraphx::make_op("dot"), dot48_alpha, dot48_beta); auto x_main_module_52 =
migraphx::op::relu relu49; mmain->add_instruction(migraphx::make_op("dot"), x_main_module_50, x_main_module_51);
auto mx49 = mm->add_instruction(relu49, mx48); auto x_main_module_53 = mmain->add_instruction(
migraphx::op::transpose transpose50; migraphx::make_op("multibroadcast", migraphx::from_json_string("{out_lens:[1,4096]}")),
transpose50.dims = {1, 0}; x_main_module_5);
auto mx50 = mm->add_instruction(transpose50, mx1); auto x_main_module_54 = mmain->add_instruction(
migraphx::op::multibroadcast multibroadcast51; migraphx::make_op("multibroadcast", migraphx::from_json_string("{out_lens:[1,4096]}")),
multibroadcast51.output_lens = {batch, 1000}; x_main_module_1);
auto mx51 = mm->add_instruction(multibroadcast51, mx0); auto x_main_module_55 =
float dot52_alpha = 1; mmain->add_instruction(migraphx::make_op("mul"), x_main_module_53, x_main_module_54);
float dot52_beta = 1; auto x_main_module_56 =
migraphx::add_apply_alpha_beta( mmain->add_instruction(migraphx::make_op("add"), x_main_module_52, x_main_module_55);
*mm, {mx49, mx50, mx51}, migraphx::make_op("dot"), dot52_alpha, dot52_beta); auto x_main_module_57 = mmain->add_instruction(migraphx::make_op("relu"), x_main_module_56);
auto x_main_module_58 = mmain->add_instruction(
migraphx::make_op("transpose", migraphx::from_json_string("{permutation:[1,0]}")),
x_main_module_8);
auto x_main_module_59 =
mmain->add_instruction(migraphx::make_op("dot"), x_main_module_57, x_main_module_58);
auto x_main_module_60 = mmain->add_instruction(
migraphx::make_op("multibroadcast", migraphx::from_json_string("{out_lens:[1,1000]}")),
x_main_module_9);
auto x_main_module_61 = mmain->add_instruction(
migraphx::make_op("multibroadcast", migraphx::from_json_string("{out_lens:[1,1000]}")),
x_main_module_0);
auto x_main_module_62 =
mmain->add_instruction(migraphx::make_op("mul"), x_main_module_60, x_main_module_61);
auto x_main_module_63 =
mmain->add_instruction(migraphx::make_op("add"), x_main_module_59, x_main_module_62);
mmain->add_return({x_main_module_63});
return p; return p;
} }
} // namespace MIGRAPHX_INLINE_NS } // namespace MIGRAPHX_INLINE_NS
} // namespace driver } // namespace driver
} // namespace migraphx } // namespace migraphx
This diff is collapsed.
...@@ -210,6 +210,9 @@ struct loader ...@@ -210,6 +210,9 @@ struct loader
auto last = std::prev(mm->end(), trim); auto last = std::prev(mm->end(), trim);
mm->remove_instructions(last, mm->end()); mm->remove_instructions(last, mm->end());
} }
// Remove unused variable when exporting to cpp
if(output_type == "cpp")
migraphx::run_passes(*p.get_main_module(), {migraphx::dead_code_elimination{}});
if(optimize) if(optimize)
{ {
migraphx::run_passes(*p.get_main_module(), migraphx::run_passes(*p.get_main_module(),
......
This diff is collapsed.
...@@ -142,7 +142,7 @@ static std::vector<instruction_ref> append_pointwise_module(instruction_ref ins, ...@@ -142,7 +142,7 @@ static std::vector<instruction_ref> append_pointwise_module(instruction_ref ins,
input_map[input] = map_ins[param]; input_map[input] = map_ins[param];
} }
} }
pm->replace_return(pm->insert_module_instructions(last, xm, map_ins)); pm->replace_return(pm->insert_instructions(last, xm, map_ins));
return inputs; return inputs;
} }
......
...@@ -120,9 +120,33 @@ struct module ...@@ -120,9 +120,33 @@ struct module
instruction_ref move_instructions(instruction_ref src, instruction_ref dst); instruction_ref move_instructions(instruction_ref src, instruction_ref dst);
std::vector<instruction_ref> std::vector<instruction_ref>
insert_module_instructions(instruction_ref ins, add_instructions(const std::vector<instruction_ref>& instructions,
module_ref m, std::unordered_map<instruction_ref, instruction_ref> map_ins = {});
std::unordered_map<instruction_ref, instruction_ref> map_ins = {});
std::vector<instruction_ref>
add_instructions(module_ref m,
std::unordered_map<instruction_ref, instruction_ref> map_ins = {});
std::vector<instruction_ref>
add_instructions(instruction_ref start,
instruction_ref last,
std::unordered_map<instruction_ref, instruction_ref> map_ins = {});
std::vector<instruction_ref>
insert_instructions(instruction_ref ins,
const std::vector<instruction_ref>& instructions,
std::unordered_map<instruction_ref, instruction_ref> map_ins = {});
std::vector<instruction_ref>
insert_instructions(instruction_ref ins,
module_ref m,
std::unordered_map<instruction_ref, instruction_ref> map_ins = {});
std::vector<instruction_ref>
insert_instructions(instruction_ref ins,
instruction_ref start,
instruction_ref last,
std::unordered_map<instruction_ref, instruction_ref> map_ins = {});
template <class... Ts> template <class... Ts>
instruction_ref add_literal(Ts&&... xs) instruction_ref add_literal(Ts&&... xs)
...@@ -179,7 +203,9 @@ struct module ...@@ -179,7 +203,9 @@ struct module
void print_cpp(std::ostream& os) const; void print_cpp(std::ostream& os) const;
std::unordered_map<instruction_ref, std::string> std::unordered_map<instruction_ref, std::string>
print_cpp(std::ostream& os, std::unordered_map<instruction_ref, std::string> names) const; print_cpp(std::ostream& os,
const std::string& mname,
std::unordered_map<instruction_ref, std::string> names) const;
void annotate(std::ostream& os, std::function<void(instruction_ref)> a) const; void annotate(std::ostream& os, std::function<void(instruction_ref)> a) const;
......
...@@ -196,43 +196,64 @@ struct nonmaxsuppression ...@@ -196,43 +196,64 @@ struct nonmaxsuppression
return boxes_heap; return boxes_heap;
} }
template <class H, class S> template <class Output, class Boxes, class Scores>
void select_boxes(H& boxes_heap, std::size_t compute_nms(Output output,
std::vector<std::pair<double, int64_t>>& selected_boxes_inside_class, Boxes boxes,
std::vector<int64_t>& selected_indices, Scores scores,
S batch_boxes_start, const shape& max_output_shape,
std::size_t max_output_boxes_per_class, std::size_t max_output_boxes_per_class,
double iou_threshold, double iou_threshold,
std::size_t batch_idx, double score_threshold) const
std::size_t class_idx) const
{ {
selected_boxes_inside_class.clear(); std::fill(output.begin(), output.end(), 0);
// Get the next box with top score, filter by iou_threshold const auto& lens = scores.get_shape().lens();
while(!boxes_heap.empty() && const auto num_batches = lens[0];
selected_boxes_inside_class.size() < max_output_boxes_per_class) const auto num_classes = lens[1];
{ const auto num_boxes = lens[2];
// Check with existing selected boxes for this class, remove box if it // boxes of a class with NMS applied [score, index]
// exceeds the IOU (Intersection Over Union) threshold std::vector<std::pair<double, int64_t>> selected_boxes_inside_class;
const auto next_top_score = boxes_heap.top(); std::vector<int64_t> selected_indices;
bool not_selected = selected_boxes_inside_class.reserve(max_output_shape.elements());
std::any_of(selected_boxes_inside_class.begin(), // iterate over batches and classes
selected_boxes_inside_class.end(), shape comp_s{shape::double_type, {num_batches, num_classes}};
[&](auto selected_index) { shape_for_each(comp_s, [&](auto idx) {
return this->suppress_by_iou( auto batch_idx = idx[0];
batch_box(batch_boxes_start, next_top_score.second), auto class_idx = idx[1];
batch_box(batch_boxes_start, selected_index.second), // index offset for this class
iou_threshold); auto scores_start = scores.begin() + (batch_idx * num_classes + class_idx) * num_boxes;
}); // iterator to first value of this batch
auto batch_boxes_start = boxes.begin() + batch_idx * num_boxes * 4;
if(not not_selected) auto boxes_heap = filter_boxes_by_score(scores_start, num_boxes, score_threshold);
selected_boxes_inside_class.clear();
// Get the next box with top score, filter by iou_threshold
while(!boxes_heap.empty() &&
selected_boxes_inside_class.size() < max_output_boxes_per_class)
{ {
selected_boxes_inside_class.push_back(next_top_score); // Check with existing selected boxes for this class, remove box if it
selected_indices.push_back(batch_idx); // exceeds the IOU (Intersection Over Union) threshold
selected_indices.push_back(class_idx); const auto next_top_score = boxes_heap.top();
selected_indices.push_back(next_top_score.second); bool not_selected =
std::any_of(selected_boxes_inside_class.begin(),
selected_boxes_inside_class.end(),
[&](auto selected_index) {
return this->suppress_by_iou(
batch_box(batch_boxes_start, next_top_score.second),
batch_box(batch_boxes_start, selected_index.second),
iou_threshold);
});
if(not not_selected)
{
selected_boxes_inside_class.push_back(next_top_score);
selected_indices.push_back(batch_idx);
selected_indices.push_back(class_idx);
selected_indices.push_back(next_top_score.second);
}
boxes_heap.pop();
} }
boxes_heap.pop(); });
} std::copy(selected_indices.begin(), selected_indices.end(), output.begin());
return selected_indices.size() / 3;
} }
argument compute(const shape& output_shape, std::vector<argument> args) const argument compute(const shape& output_shape, std::vector<argument> args) const
...@@ -253,40 +274,15 @@ struct nonmaxsuppression ...@@ -253,40 +274,15 @@ struct nonmaxsuppression
result.visit([&](auto output) { result.visit([&](auto output) {
visit_all(args[0], args[1])([&](auto boxes, auto scores) { visit_all(args[0], args[1])([&](auto boxes, auto scores) {
const auto& lens = scores.get_shape().lens(); num_selected = compute_nms(output,
const auto num_batches = lens[0]; boxes,
const auto num_classes = lens[1]; scores,
const auto num_boxes = lens[2]; max_output_shape,
// boxes of a class with NMS applied [score, index] max_output_boxes_per_class,
std::vector<std::pair<double, int64_t>> selected_boxes_inside_class; iou_threshold,
std::vector<int64_t> selected_indices; score_threshold);
selected_boxes_inside_class.reserve(max_output_shape.elements());
// iterate over batches and classes
shape comp_s{shape::double_type, {num_batches, num_classes}};
shape_for_each(comp_s, [&](auto idx) {
auto batch_idx = idx[0];
auto class_idx = idx[1];
// index offset for this class
auto scores_start =
scores.begin() + (batch_idx * num_classes + class_idx) * num_boxes;
// iterator to first value of this batch
auto batch_boxes_start = boxes.begin() + batch_idx * num_boxes * 4;
auto boxes_heap =
filter_boxes_by_score(scores_start, num_boxes, score_threshold);
select_boxes(boxes_heap,
selected_boxes_inside_class,
selected_indices,
batch_boxes_start,
max_output_boxes_per_class,
iou_threshold,
batch_idx,
class_idx);
});
std::copy(selected_indices.begin(), selected_indices.end(), output.begin());
num_selected = selected_indices.size() / 3;
}); });
}); });
return result.reshape({output_shape.type(), {num_selected, 3}}); return result.reshape({output_shape.type(), {num_selected, 3}});
} }
}; };
......
...@@ -44,8 +44,8 @@ auto with_char(F f) ...@@ -44,8 +44,8 @@ auto with_char(F f)
return [=](unsigned char c) -> bool { return f(c); }; return [=](unsigned char c) -> bool { return f(c); };
} }
inline std::string inline void
replace_string(std::string subject, const std::string& search, const std::string& replace) replace_string_inplace(std::string& subject, const std::string& search, const std::string& replace)
{ {
size_t pos = 0; size_t pos = 0;
while((pos = subject.find(search, pos)) != std::string::npos) while((pos = subject.find(search, pos)) != std::string::npos)
...@@ -53,6 +53,12 @@ replace_string(std::string subject, const std::string& search, const std::string ...@@ -53,6 +53,12 @@ replace_string(std::string subject, const std::string& search, const std::string
subject.replace(pos, search.length(), replace); subject.replace(pos, search.length(), replace);
pos += replace.length(); pos += replace.length();
} }
}
inline std::string
replace_string(std::string subject, const std::string& search, const std::string& replace)
{
replace_string_inplace(subject, search, replace);
return subject; return subject;
} }
......
...@@ -35,7 +35,7 @@ static void inline_submodule(module& m, instruction_ref ins, bool cond) ...@@ -35,7 +35,7 @@ static void inline_submodule(module& m, instruction_ref ins, bool cond)
{ {
const auto& mod_inputs = ins->module_inputs(); const auto& mod_inputs = ins->module_inputs();
module_ref smod = cond ? mod_inputs.at(0) : mod_inputs.at(1); module_ref smod = cond ? mod_inputs.at(0) : mod_inputs.at(1);
auto mod_outputs = m.insert_module_instructions(ins, smod); auto mod_outputs = m.insert_instructions(ins, smod);
auto ins_outputs = ins->outputs(); auto ins_outputs = ins->outputs();
assert(mod_outputs.size() >= ins_outputs.size()); assert(mod_outputs.size() >= ins_outputs.size());
......
...@@ -35,6 +35,7 @@ ...@@ -35,6 +35,7 @@
#include <migraphx/make_op.hpp> #include <migraphx/make_op.hpp>
#include <migraphx/register_target.hpp> #include <migraphx/register_target.hpp>
#include <migraphx/make_op.hpp> #include <migraphx/make_op.hpp>
#include <migraphx/json.hpp>
#include <iostream> #include <iostream>
#include <sstream> #include <sstream>
#include <algorithm> #include <algorithm>
...@@ -196,6 +197,62 @@ void module::assign(const module& m) ...@@ -196,6 +197,62 @@ void module::assign(const module& m)
} }
} }
template <class Range>
static std::vector<instruction_ref>
insert_generic_instructions(module& m,
instruction_ref ins,
Range&& instructions,
std::unordered_map<instruction_ref, instruction_ref> map_ins)
{
assert(m.has_instruction(ins) or is_end(ins, m.end()));
std::vector<instruction_ref> mod_outputs;
instruction_ref last;
for(instruction_ref sins : instructions)
{
last = sins;
if(contains(map_ins, sins))
continue;
instruction_ref copy_ins;
if(sins->name() == "@literal")
{
auto l = sins->get_literal();
copy_ins = m.add_literal(l);
}
else if(sins->name() == "@param")
{
auto&& name = any_cast<builtin::param>(sins->get_operator()).parameter;
auto s = sins->get_shape();
copy_ins = m.add_parameter(name, s);
}
else if(sins->name() == "@outline")
{
auto s = sins->get_shape();
copy_ins = m.add_outline(s);
}
else
{
auto mod_args = sins->module_inputs();
auto inputs = sins->inputs();
std::vector<instruction_ref> copy_inputs(inputs.size());
std::transform(inputs.begin(), inputs.end(), copy_inputs.begin(), [&](auto i) {
return contains(map_ins, i) ? map_ins[i] : i;
});
if(sins->name() == "@return")
{
mod_outputs = copy_inputs;
break;
}
copy_ins = m.insert_instruction(ins, sins->get_operator(), copy_inputs, mod_args);
}
map_ins[sins] = copy_ins;
}
if(mod_outputs.empty() and instructions.begin() != instructions.end())
mod_outputs = {map_ins.at(last)};
return mod_outputs;
}
instruction_ref module::add_instruction(const operation& op, std::vector<instruction_ref> args) instruction_ref module::add_instruction(const operation& op, std::vector<instruction_ref> args)
{ {
return insert_instruction(impl->instructions.end(), op, std::move(args)); return insert_instruction(impl->instructions.end(), op, std::move(args));
...@@ -334,53 +391,49 @@ instruction_ref module::move_instructions(instruction_ref src, instruction_ref d ...@@ -334,53 +391,49 @@ instruction_ref module::move_instructions(instruction_ref src, instruction_ref d
return src; return src;
} }
std::vector<instruction_ref> module::insert_module_instructions( std::vector<instruction_ref>
instruction_ref ins, module_ref m, std::unordered_map<instruction_ref, instruction_ref> map_ins) module::add_instructions(const std::vector<instruction_ref>& instructions,
std::unordered_map<instruction_ref, instruction_ref> map_ins)
{ {
std::vector<instruction_ref> mod_outputs; return this->insert_instructions(this->end(), instructions, std::move(map_ins));
for(auto sins : iterator_for(*m)) }
{
if(contains(map_ins, sins))
continue;
instruction_ref copy_ins;
if(sins->name() == "@literal")
{
auto l = sins->get_literal();
copy_ins = this->add_literal(l);
}
else if(sins->name() == "@param")
{
auto&& name = any_cast<builtin::param>(sins->get_operator()).parameter;
auto s = sins->get_shape();
copy_ins = this->add_parameter(name, s);
}
else if(sins->name() == "@outline")
{
auto s = sins->get_shape();
copy_ins = this->add_outline(s);
}
else
{
auto mod_args = sins->module_inputs();
auto inputs = sins->inputs();
std::vector<instruction_ref> copy_inputs(inputs.size());
std::transform(inputs.begin(), inputs.end(), copy_inputs.begin(), [&](auto i) {
return contains(map_ins, i) ? map_ins[i] : i;
});
if(sins->name() == "@return") std::vector<instruction_ref>
{ module::add_instructions(module_ref m, std::unordered_map<instruction_ref, instruction_ref> map_ins)
mod_outputs = copy_inputs; {
break; return this->insert_instructions(this->end(), m, std::move(map_ins));
} }
copy_ins = this->insert_instruction(ins, sins->get_operator(), copy_inputs, mod_args); std::vector<instruction_ref>
} module::add_instructions(instruction_ref start,
map_ins[sins] = copy_ins; instruction_ref last,
} std::unordered_map<instruction_ref, instruction_ref> map_ins)
if(mod_outputs.empty()) {
mod_outputs = {map_ins.at(std::prev(m->end()))}; return this->insert_instructions(this->end(), start, last, std::move(map_ins));
return mod_outputs; }
std::vector<instruction_ref>
module::insert_instructions(instruction_ref ins,
const std::vector<instruction_ref>& instructions,
std::unordered_map<instruction_ref, instruction_ref> map_ins)
{
return insert_generic_instructions(*this, ins, instructions, std::move(map_ins));
}
std::vector<instruction_ref> module::insert_instructions(
instruction_ref ins, module_ref m, std::unordered_map<instruction_ref, instruction_ref> map_ins)
{
return insert_generic_instructions(*this, ins, iterator_for(*m), std::move(map_ins));
}
std::vector<instruction_ref>
module::insert_instructions(instruction_ref ins,
instruction_ref start,
instruction_ref last,
std::unordered_map<instruction_ref, instruction_ref> map_ins)
{
auto r = range(start, last);
return insert_generic_instructions(*this, ins, iterator_for(r), std::move(map_ins));
} }
instruction_ref module::add_literal(literal l) instruction_ref module::add_literal(literal l)
...@@ -706,44 +759,33 @@ void module::print_graph(std::ostream& os, bool brief) const ...@@ -706,44 +759,33 @@ void module::print_graph(std::ostream& os, bool brief) const
os << "}" << std::endl; os << "}" << std::endl;
} }
static std::string cpp_var_name(const std::string& name) static std::string to_c_id(const std::string& name, char rep = '_')
{ {
return "m" + replace_string(name, "@", "x"); std::string id = transform_string(name, [&](auto c) {
if(with_char(::isalnum)(c) or c == '_')
return c;
return rep;
});
while(contains(id, "__"))
replace_string_inplace(id, "__", "_");
return id;
} }
static std::string cpp_op_var(const std::string& name, instruction_ref ins) static std::string cpp_var_name(const std::string& name)
{ {
return replace_string(name, "@", ins->name()); return to_c_id("x_" + replace_string(name, ":", "_module_"));
} }
static void print_op_attributes(std::ostream& os, const std::string& name, const operation& op) static void print_make_op(std::ostream& os, const operation& op)
{ {
std::string x = to_string(op); os << "migraphx::make_op(" << enclose_name(op.name());
if(contains(x, "[")) auto v = op.to_value();
if(not v.empty())
{ {
auto start = x.find('['); os << ", "
auto end = x.find(']'); << "migraphx::from_json_string(" << enclose_name(to_json_string(v)) << ")";
std::string attribute_text = x.substr(start + 1, end - start - 1);
std::vector<std::string> attributes;
for(auto&& attribute : split_string(attribute_text, ','))
{
if(contains(attribute, '='))
attributes.push_back(attribute);
else
attributes.back() += "," + attribute;
}
for(auto&& attribute : attributes)
{
auto p = split_string(attribute, '=');
auto key = p.front();
auto value = p.back();
if(contains({"bn_mode", "padding_mode"}, key))
continue;
if(key == "mode")
value = enclose_name(trim(value));
os << name << "." << key << " = " << value << ";" << std::endl;
}
} }
os << ")";
} }
static void print_cpp_shape(std::ostream& os, const migraphx::shape& s) static void print_cpp_shape(std::ostream& os, const migraphx::shape& s)
...@@ -756,22 +798,25 @@ static void print_cpp_shape(std::ostream& os, const migraphx::shape& s) ...@@ -756,22 +798,25 @@ static void print_cpp_shape(std::ostream& os, const migraphx::shape& s)
} }
std::unordered_map<instruction_ref, std::string> std::unordered_map<instruction_ref, std::string>
module::print_cpp(std::ostream& os, std::unordered_map<instruction_ref, std::string> names) const module::print_cpp(std::ostream& os,
const std::string& mname,
std::unordered_map<instruction_ref, std::string> names) const
{ {
os << "migraphx::module p;" << std::endl; // cppcheck-suppress variableScope
unsigned long seed = 0; unsigned long seed = names.size();
auto last = std::prev(this->end());
names = this->print( names = this->print(
[&](auto ins, auto ins_names) { [&](auto ins, auto ins_names) {
auto op = cpp_op_var(ins_names.at(ins), ins); std::vector<std::string> input_vars;
if(ins->name().front() != '@') std::transform(ins->inputs().begin(),
{ ins->inputs().end(),
os << "migraphx::op::" << ins->name() << " " << op << ";" << std::endl; std::back_inserter(input_vars),
print_op_attributes(os, op, ins->get_operator()); [&](auto input) { return cpp_var_name(ins_names.at(input)); });
} if(ins != last)
os << "auto " << cpp_var_name(ins_names.at(ins)) << " = "; os << "auto " << cpp_var_name(ins_names.at(ins)) << " = ";
if(ins->name() == "@literal") if(ins->name() == "@literal")
{ {
os << "p.add_literal("; os << mname << "->add_literal(";
bool use_abs = false; bool use_abs = false;
ins->get_literal().visit([&](auto v) { ins->get_literal().visit([&](auto v) {
use_abs = std::none_of(v.begin(), v.end(), [](auto x) { return x < 0; }); use_abs = std::none_of(v.begin(), v.end(), [](auto x) { return x < 0; });
...@@ -789,17 +834,22 @@ module::print_cpp(std::ostream& os, std::unordered_map<instruction_ref, std::str ...@@ -789,17 +834,22 @@ module::print_cpp(std::ostream& os, std::unordered_map<instruction_ref, std::str
else if(ins->name() == "@param") else if(ins->name() == "@param")
{ {
std::string name = any_cast<builtin::param>(ins->get_operator()).parameter; std::string name = any_cast<builtin::param>(ins->get_operator()).parameter;
os << "p.add_parameter(" << enclose_name(name) << ","; os << mname << "->add_parameter(" << enclose_name(name) << ",";
print_cpp_shape(os, ins->get_shape()); print_cpp_shape(os, ins->get_shape());
os << ");" << std::endl; os << ");" << std::endl;
} }
else if(ins->name() == "@return")
{
os << mname << "->add_return({";
os << join_strings(input_vars, ", ");
os << "});" << std::endl;
}
else else
{ {
os << "p.add_instruction(" << op; assert(ins->name().front() != '@');
for(auto input : ins->inputs()) os << mname << "->add_instruction(";
{ print_make_op(os, ins->get_operator());
os << ", " << cpp_var_name(ins_names.at(input)); os << ", " << join_strings(input_vars, ", ");
}
os << ");" << std::endl; os << ");" << std::endl;
} }
}, },
...@@ -808,7 +858,7 @@ module::print_cpp(std::ostream& os, std::unordered_map<instruction_ref, std::str ...@@ -808,7 +858,7 @@ module::print_cpp(std::ostream& os, std::unordered_map<instruction_ref, std::str
return names; return names;
} }
void module::print_cpp(std::ostream& os) const { this->print_cpp(os, {}); } void module::print_cpp(std::ostream& os) const { this->print_cpp(os, this->name(), {}); }
void module::annotate(std::ostream& os, std::function<void(instruction_ref)> a) const void module::annotate(std::ostream& os, std::function<void(instruction_ref)> a) const
{ {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment