/******************************************************************************* * * MIT License * * Copyright (c) 2017 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * *******************************************************************************/ #ifndef GUARD_OLC_HIPOC_KERNEL_HPP #define GUARD_OLC_HIPOC_KERNEL_HPP #include #include #include #include #include #include #include #include #include namespace olCompile { using HipEventPtr = OLC_MANAGE_PTR(hipEvent_t, hipEventDestroy); inline HipEventPtr make_hip_event() { hipEvent_t result = nullptr; MY_HIP_CHECK(hipEventCreate(&result)); return HipEventPtr{result}; } template struct KernelArgsPair { static const int alignment = sizeof(U); static const int padding = (alignment - (sizeof(T) % alignment)) % alignment; static const int second_index = sizeof(T) + padding; KernelArgsPair(T x, U y) { new(buffer) T(x); // NOLINT (clang-analyzer-cplusplus.PlacementNew) new(buffer + second_index) U(y); } char buffer[second_index + sizeof(U)] = {}; }; template struct KernelArgsPack; template struct KernelArgsPack { using data_t = KernelArgsPack, Ts...>; KernelArgsPack(T x, U y, Ts... xs) : data(KernelArgsPair(x, y), xs...) {} data_t data; }; template struct KernelArgsPack { KernelArgsPack(T x) : head(x) {} T head; }; template struct KernelArgs { KernelArgs(Ts... xs) : pack(xs...) { std::fill(std::begin(hidden), std::end(hidden), 0); } KernelArgsPack pack; uint64_t hidden[6] = {}; }; struct HIPOCKernelInvoke { hipStream_t stream = nullptr; hipFunction_t fun = nullptr; std::array ldims = {}; std::array gdims = {}; std::string name; std::function callback; // Workaround for aggregate types in c++11 HIPOCKernelInvoke() {} HIPOCKernelInvoke(hipStream_t pstream, hipFunction_t pfun, std::array pldims, std::array pgdims, std::string pname, std::function pcallback) : stream(pstream), fun(pfun), ldims(pldims), gdims(pgdims), name(pname), callback(pcallback) { } void operator()(std::vector& any_args) const { char hip_args[256] = {0}; auto sz_left = any_args[0].size(); memcpy(hip_args, &(any_args[0].buffer[0]), any_args[0].size()); for(unsigned long idx = 1; idx < any_args.size(); idx++) { auto& any_arg = any_args[idx]; unsigned long alignment = any_arg.size(); unsigned long padding = (alignment - (sz_left % alignment)) % alignment; unsigned long second_index = sz_left + padding; memcpy(hip_args + second_index, &(any_arg.buffer[0]), any_arg.size()); sz_left = second_index + alignment; } run(hip_args, sz_left); } template void operator()(Ts... xs) const { KernelArgs args{xs...}; run(&args, sizeof(args)); } void run(void* args, std::size_t size) const; const std::string& GetName() const { return name; } }; struct HIPOCKernel { HIPOCProgram program; std::string name; std::array ldims = {}; std::array gdims = {}; std::string kernel_module; hipFunction_t fun = nullptr; HIPOCKernel() {} HIPOCKernel(HIPOCProgram p, const std::string kernel_name, std::vector local_dims, std::vector global_dims) : program(p), name(kernel_name) { assert(!local_dims.empty() && local_dims.size() <= 3); assert(!global_dims.empty() && global_dims.size() <= 3); ldims.fill(1); gdims.fill(1); std::copy(local_dims.begin(), local_dims.end(), ldims.begin()); std::copy(global_dims.begin(), global_dims.end(), gdims.begin()); kernel_module = name; MY_HIP_CHECK(hipModuleGetFunction(&fun, program.GetModule(), kernel_module.c_str())); } HIPOCKernelInvoke Invoke(hipStream_t stream, std::function callback = nullptr) const; }; } // namespace olCompile #endif