Unverified Commit d1db6a0c authored by Chao Liu's avatar Chao Liu Committed by GitHub
Browse files

Absolute include path (#281)

* ad gelu and fast_gelu

* added GeLU and fast GeLU

* clean up

* add gemm+fastgelu example

* add gemm+gelu instances

* update profiler

* clean up

* clean up

* adding gemm+bias+activation

* clean

* adding bias

* clean

* adding gemm multiple d

* debugging

* add gemm bias add fastgelu

* rename, clean

* refactoring; add readme

* refactor

* refactor

* refactor

* refactor

* refactor

* refactor

* fix

* fix

* update example

* update example

* rename

* update example

* add ckProfiler

* clean

* clean

* clean

* clean

* add client app example

* update readme

* delete obselete files

* remove old client app

* delete old file

* cleaning

* clean

* remove half

* fix header path

* fix header path

* fix header path

* fix header path

* fix header path

* fix header path for all examples

* fix header path

* fix header path

* fix header path

* fix header path

* fix header path

* fix header path

* fix header path

* fix header path

* fix header path

* revert client app example

* clean build

* fix build

* temporary disable client test on Jenkins

* clean

* clean

* clean
parent a49115b9
#include <algorithm> #include <algorithm>
#include <cstdlib> #include <cstdlib>
#include <half.hpp>
#include <iostream> #include <iostream>
#include <numeric> #include <numeric>
#include <tuple> #include <tuple>
#include <vector> #include <vector>
#include "../gemm/gemm_util.hpp" #include "ck/ck.hpp"
#include "config.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "print.hpp" #include "ck/tensor_operation/gpu/device/device_gemm_dl.hpp"
#include "device.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "host_tensor.hpp"
#include "host_tensor_generator.hpp" #include "ck/library/utility/check_err.hpp"
#include "host_gemm.hpp" #include "ck/library/host_tensor/device_memory.hpp"
#include "device_tensor.hpp" #include "ck/library/host_tensor/host_tensor.hpp"
#include "device_gemm_dl.hpp" #include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "element_wise_operation.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "reference_gemm.hpp"
#include "gemm_specialization.hpp" #include "test/gemm/gemm_util.hpp"
using PassThrough = ck::tensor_operation::element_wise::PassThrough; using PassThrough = ck::tensor_operation::element_wise::PassThrough;
......
#ifndef GEMM_UTILS_HPP #pragma once
#define GEMM_UTILS_HPP
#include "check_err.hpp" #include "ck/ck.hpp"
#include "config.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "device.hpp" #include "ck/library/utility/check_err.hpp"
#include "host_tensor.hpp" #include "ck/library/host_tensor/device_memory.hpp"
#include "host_tensor_generator.hpp" #include "ck/library/host_tensor/host_tensor.hpp"
#include "reference_gemm.hpp" #include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "tensor_layout.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
namespace ck { namespace ck {
namespace gemm_util { namespace gemm_util {
...@@ -350,4 +349,3 @@ struct TestGemmBF16 ...@@ -350,4 +349,3 @@ struct TestGemmBF16
} // namespace gemm_util } // namespace gemm_util
} // namespace ck } // namespace ck
#endif
#include <algorithm> #include <algorithm>
#include <cstdlib> #include <cstdlib>
#include <half.hpp>
#include <iostream> #include <iostream>
#include <numeric> #include <numeric>
#include <tuple> #include <tuple>
#include <vector> #include <vector>
#include "gemm_util.hpp" #include "ck/ck.hpp"
#include "config.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "print.hpp" #include "ck/tensor_operation/gpu/device/device_gemm.hpp"
#include "device.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "host_tensor.hpp"
#include "host_tensor_generator.hpp" #include "ck/library/utility/check_err.hpp"
#include "host_gemm.hpp" #include "ck/library/host_tensor/device_memory.hpp"
#include "device_tensor.hpp" #include "ck/library/host_tensor/host_tensor.hpp"
#include "device_gemm_xdl.hpp" #include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "device_gemm_xdl_cshuffle.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "element_wise_operation.hpp"
#include "reference_gemm.hpp" #include "test/gemm/gemm_util.hpp"
#include "gemm_specialization.hpp"
using PassThrough = ck::tensor_operation::element_wise::PassThrough; using PassThrough = ck::tensor_operation::element_wise::PassThrough;
......
#include <algorithm> #include <algorithm>
#include <cstdlib> #include <cstdlib>
#include <half.hpp>
#include <iostream> #include <iostream>
#include <numeric> #include <numeric>
#include <tuple> #include <tuple>
#include <vector> #include <vector>
#include "gemm_util.hpp" #include "ck/ck.hpp"
#include "config.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "print.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "device.hpp" #include "ck/tensor_operation/gpu/device/device_gemm.hpp"
#include "host_gemm.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "device_tensor.hpp"
#include "device_gemm_xdl.hpp" #include "ck/library/utility/check_err.hpp"
#include "device_gemm_xdl_cshuffle.hpp" #include "ck/library/host_tensor/device_memory.hpp"
#include "element_wise_operation.hpp" #include "ck/library/host_tensor/host_tensor.hpp"
#include "gemm_specialization.hpp" #include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "test/gemm/gemm_util.hpp"
using PassThrough = ck::tensor_operation::element_wise::PassThrough; using PassThrough = ck::tensor_operation::element_wise::PassThrough;
......
#include <algorithm> #include <algorithm>
#include <cstdlib> #include <cstdlib>
#include <half.hpp>
#include <iostream> #include <iostream>
#include <numeric> #include <numeric>
#include <tuple> #include <tuple>
#include <vector> #include <vector>
#include "gemm_util.hpp" #include "ck/ck.hpp"
#include "config.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "print.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "device.hpp" #include "ck/tensor_operation/gpu/device/device_gemm.hpp"
#include "host_tensor.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "host_tensor_generator.hpp"
#include "host_gemm.hpp" #include "ck/library/utility/check_err.hpp"
#include "device_tensor.hpp" #include "ck/library/host_tensor/device_memory.hpp"
#include "device_gemm_xdl.hpp" #include "ck/library/host_tensor/host_tensor.hpp"
#include "device_gemm_xdl_cshuffle.hpp" #include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "element_wise_operation.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "reference_gemm.hpp"
#include "gemm_specialization.hpp" #include "test/gemm/gemm_util.hpp"
using PassThrough = ck::tensor_operation::element_wise::PassThrough; using PassThrough = ck::tensor_operation::element_wise::PassThrough;
......
#include <algorithm> #include <algorithm>
#include <cstdlib> #include <cstdlib>
#include <half.hpp>
#include <iostream> #include <iostream>
#include <numeric> #include <numeric>
#include <tuple> #include <tuple>
#include <vector> #include <vector>
#include "gemm_util.hpp" #include "ck/ck.hpp"
#include "config.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "print.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "device.hpp" #include "ck/tensor_operation/gpu/device/device_gemm.hpp"
#include "host_tensor.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "host_tensor_generator.hpp"
#include "host_gemm.hpp" #include "ck/library/utility/check_err.hpp"
#include "device_tensor.hpp" #include "ck/library/host_tensor/device_memory.hpp"
#include "device_gemm_xdl.hpp" #include "ck/library/host_tensor/host_tensor.hpp"
#include "element_wise_operation.hpp" #include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "reference_gemm.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "gemm_specialization.hpp"
#include "test/gemm/gemm_util.hpp"
using PassThrough = ck::tensor_operation::element_wise::PassThrough; using PassThrough = ck::tensor_operation::element_wise::PassThrough;
......
#include <algorithm> #include <algorithm>
#include <cstdlib> #include <cstdlib>
#include <half.hpp>
#include <iostream> #include <iostream>
#include <numeric> #include <numeric>
#include <tuple> #include <tuple>
#include <vector> #include <vector>
#include "gemm_util.hpp" #include "ck/ck.hpp"
#include "config.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "print.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "device.hpp" #include "ck/tensor_operation/gpu/device/device_gemm.hpp"
#include "host_tensor.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "host_tensor_generator.hpp"
#include "host_gemm.hpp" #include "ck/library/utility/check_err.hpp"
#include "device_tensor.hpp" #include "ck/library/host_tensor/device_memory.hpp"
#include "device_gemm_xdl.hpp" #include "ck/library/host_tensor/host_tensor.hpp"
#include "device_gemm_xdl_cshuffle.hpp" #include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "element_wise_operation.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "reference_gemm.hpp"
#include "gemm_specialization.hpp" #include "test/gemm/gemm_util.hpp"
using PassThrough = ck::tensor_operation::element_wise::PassThrough; using PassThrough = ck::tensor_operation::element_wise::PassThrough;
......
include_directories(BEFORE
${PROJECT_SOURCE_DIR}/profiler/include
${PROJECT_SOURCE_DIR}/test/include
${PROJECT_SOURCE_DIR}/external/include/half
)
add_test_executable(test_gemm_reduce_fp16 gemm_reduce_fp16.cpp) add_test_executable(test_gemm_reduce_fp16 gemm_reduce_fp16.cpp)
target_link_libraries(test_gemm_reduce_fp16 PRIVATE host_tensor) target_link_libraries(test_gemm_reduce_fp16 PRIVATE host_tensor)
target_link_libraries(test_gemm_reduce_fp16 PRIVATE device_gemm_reduce_instance) target_link_libraries(test_gemm_reduce_fp16 PRIVATE device_gemm_reduce_instance)
#include <iostream> #include <iostream>
#include "profile_gemm_reduce_impl.hpp" #include "profiler/include/profile_gemm_reduce_impl.hpp"
int main() int main()
{ {
......
#include <iostream> #include <iostream>
#include <initializer_list> #include <initializer_list>
#include <cstdlib> #include <cstdlib>
#include <stdlib.h>
#include "config.hpp" #include "ck/ck.hpp"
#include "print.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "device.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "host_tensor.hpp" #include "ck/tensor_operation/gpu/device/device_gemm_xdl_splitk.hpp"
#include "host_tensor_generator.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "device_tensor.hpp"
#include "host_gemm.hpp" #include "ck/library/utility/check_err.hpp"
#include "tensor_layout.hpp" #include "ck/library/host_tensor/device_memory.hpp"
#include "device_gemm_xdl_splitk.hpp" #include "ck/library/host_tensor/host_tensor.hpp"
#include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "ck/library/host_tensor/device_memory.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "ck/library/host_tensor/host_gemm.hpp"
enum struct GemmMatrixLayout enum struct GemmMatrixLayout
{ {
......
...@@ -2,21 +2,18 @@ ...@@ -2,21 +2,18 @@
#include <numeric> #include <numeric>
#include <initializer_list> #include <initializer_list>
#include <cstdlib> #include <cstdlib>
#include <stdlib.h>
#include <half.hpp> #include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "check_err.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "config.hpp" #include "ck/tensor_operation/gpu/device/device_grouped_gemm_xdl.hpp"
#include "print.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "device.hpp"
#include "host_tensor.hpp" #include "ck/library/utility/check_err.hpp"
#include "host_tensor_generator.hpp" #include "ck/library/host_tensor/device_memory.hpp"
#include "host_gemm.hpp" #include "ck/library/host_tensor/host_tensor.hpp"
#include "device_tensor.hpp" #include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "device_grouped_gemm_xdl.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "element_wise_operation.hpp"
#include "reference_gemm.hpp"
#include "gemm_specialization.hpp"
using PassThrough = ck::tensor_operation::element_wise::PassThrough; using PassThrough = ck::tensor_operation::element_wise::PassThrough;
......
...@@ -2,16 +2,13 @@ ...@@ -2,16 +2,13 @@
#include <numeric> #include <numeric>
#include <initializer_list> #include <initializer_list>
#include <cstdlib> #include <cstdlib>
#include <stdlib.h>
#include <half.hpp> #include "ck/ck.hpp"
#include "ck/utility/magic_division.hpp"
#include "check_err.hpp" #include "ck/library/utility/check_err.hpp"
#include "config.hpp" #include "ck/library/host_tensor/device_memory.hpp"
#include "magic_division.hpp" #include "ck/library/host_tensor/host_tensor.hpp"
#include "device.hpp" #include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "host_tensor.hpp"
#include "host_tensor_generator.hpp"
#include "device_tensor.hpp"
__global__ void gpu_magic_number_division(uint32_t magic_multiplier, __global__ void gpu_magic_number_division(uint32_t magic_multiplier,
uint32_t magic_shift, uint32_t magic_shift,
......
#include "getopt.h" #include <getopt.h>
#include "host_common_util.hpp" #include "ck/library/host_tensor/host_common_util.hpp"
#include "profile_reduce_impl.hpp" #include "profiler/include/profile_reduce_impl.hpp"
using namespace ck; using namespace ck;
......
#include "getopt.h" #include <getopt.h>
#include "host_common_util.hpp" #include "ck/library/host_tensor/host_common_util.hpp"
#include "profile_reduce_impl.hpp" #include "profiler/include/profile_reduce_impl.hpp"
using namespace ck; using namespace ck;
......
#include <cmath> #include <cmath>
#include <cstdlib> #include <cstdlib>
#include <half.hpp>
#include <numeric> #include <numeric>
#include <type_traits> #include <type_traits>
#include <vector> #include <vector>
#include "gtest/gtest.h" #include <gtest/gtest.h>
#include "check_err.hpp" #include "ck/ck.hpp"
#include "config.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "conv_util.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "element_wise_operation.hpp"
#include "fill.hpp" #include "ck/library/utility/check_err.hpp"
#include "host_tensor.hpp" #include "ck/library/utility/conv_util.hpp"
#include "reference_conv_fwd.hpp" #include "ck/library/utility/fill.hpp"
#include "tensor_layout.hpp" #include "ck/library/host_tensor/host_tensor.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp"
namespace { namespace {
using InElementOp = ck::tensor_operation::element_wise::PassThrough; using InElementOp = ck::tensor_operation::element_wise::PassThrough;
......
#include <vector> #include <vector>
#include <iostream> #include <iostream>
#include "gtest/gtest.h" #include <gtest/gtest.h>
#include "config.hpp" #include "ck/ck.hpp"
#include "host_tensor.hpp" #include "ck/utility/number.hpp"
#include "check_err.hpp" #include "ck/tensor_operation/gpu/device/device_softmax.hpp"
#include "number.hpp"
#include "reference_softmax.hpp" #include "ck/library/utility/check_err.hpp"
#include "device_softmax.hpp" #include "ck/library/host_tensor/host_tensor.hpp"
#include "ck/library/host_tensor/device_memory.hpp"
#include "ck/library/reference_tensor_operation/cpu/reference_softmax.hpp"
namespace ck { namespace ck {
......
...@@ -3,7 +3,9 @@ ...@@ -3,7 +3,9 @@
#include <numeric> #include <numeric>
#include <cassert> #include <cassert>
#include "tensor_space_filling_curve.hpp" #include "ck/ck.hpp"
#include "ck/utility/common_header.hpp"
#include "ck/tensor_description/tensor_space_filling_curve.hpp"
using namespace ck; using namespace ck;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment