Unverified Commit d1db6a0c authored by Chao Liu's avatar Chao Liu Committed by GitHub
Browse files

Absolute include path (#281)

* ad gelu and fast_gelu

* added GeLU and fast GeLU

* clean up

* add gemm+fastgelu example

* add gemm+gelu instances

* update profiler

* clean up

* clean up

* adding gemm+bias+activation

* clean

* adding bias

* clean

* adding gemm multiple d

* debugging

* add gemm bias add fastgelu

* rename, clean

* refactoring; add readme

* refactor

* refactor

* refactor

* refactor

* refactor

* refactor

* fix

* fix

* update example

* update example

* rename

* update example

* add ckProfiler

* clean

* clean

* clean

* clean

* add client app example

* update readme

* delete obselete files

* remove old client app

* delete old file

* cleaning

* clean

* remove half

* fix header path

* fix header path

* fix header path

* fix header path

* fix header path

* fix header path for all examples

* fix header path

* fix header path

* fix header path

* fix header path

* fix header path

* fix header path

* fix header path

* fix header path

* fix header path

* revert client app example

* clean build

* fix build

* temporary disable client test on Jenkins

* clean

* clean

* clean
parent a49115b9
#include <stdlib.h>
#include "config.hpp"
#include "device_grouped_gemm_xdl.hpp"
#include "element_wise_operation.hpp"
#include "device_operation_instance.hpp"
#include <cstdlib>
#include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "ck/tensor_operation/gpu/device/device_grouped_gemm_xdl.hpp"
#include "ck/library/tensor_operation_instance/device_operation_instance.hpp"
namespace ck {
namespace tensor_operation {
......
#include "device_reduce_instance_blockwise.hpp"
#include "ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise.hpp"
namespace ck {
namespace tensor_operation {
......
#include "device_reduce_instance_blockwise.hpp"
#include "ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise.hpp"
namespace ck {
namespace tensor_operation {
......
#include "device_reduce_instance_blockwise.hpp"
#include "ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise.hpp"
namespace ck {
namespace tensor_operation {
......
#include "device_reduce_instance_blockwise.hpp"
#include "ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise.hpp"
namespace ck {
namespace tensor_operation {
......
#include "device_reduce_instance_blockwise.hpp"
#include "ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise.hpp"
namespace ck {
namespace tensor_operation {
......@@ -24,5 +24,4 @@ ADD_BLOCKWISE_INST_BY_ID(float, double, float, 7, 0, 0, 2, 1);
} // namespace device_reduce_instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
#include "device_reduce_instance_blockwise.hpp"
#include "ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise.hpp"
namespace ck {
namespace tensor_operation {
......
#include "device_reduce_instance_blockwise.hpp"
#include "ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise.hpp"
namespace ck {
namespace tensor_operation {
......
#include "device_reduce_instance_blockwise.hpp"
#include "ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_blockwise.hpp"
namespace ck {
namespace tensor_operation {
......
#include "device_reduce_instance_multiblock_atomic_add.hpp"
#include "ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add.hpp"
namespace ck {
namespace tensor_operation {
......@@ -20,5 +20,4 @@ ADD_MULTIBLOCK_ATOMIC_ADD_INST_BY_ID(bhalf_t, float, float, 5, 0, 0, 2, 1);
} // namespace device_reduce_instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
#include "device_reduce_instance_multiblock_atomic_add.hpp"
#include "ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add.hpp"
namespace ck {
namespace tensor_operation {
......
#include "device_reduce_instance_multiblock_atomic_add.hpp"
#include "ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add.hpp"
namespace ck {
namespace tensor_operation {
......@@ -20,5 +20,4 @@ ADD_MULTIBLOCK_ATOMIC_ADD_INST_BY_ID(float, float, float, 5, 0, 0, 2, 1);
} // namespace device_reduce_instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
#include "device_reduce_instance_multiblock_atomic_add.hpp"
#include "ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add.hpp"
namespace ck {
namespace tensor_operation {
......@@ -20,5 +20,4 @@ ADD_MULTIBLOCK_ATOMIC_ADD_INST_BY_ID(float, double, float, 5, 0, 0, 2, 1);
} // namespace device_reduce_instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
#include "device_reduce_instance_multiblock_atomic_add.hpp"
#include "ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_multiblock_atomic_add.hpp"
namespace ck {
namespace tensor_operation {
......
#include "device_reduce_instance_threadwise.hpp"
#include "ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise.hpp"
namespace ck {
namespace tensor_operation {
......
#include "device_reduce_instance_threadwise.hpp"
#include "ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise.hpp"
namespace ck {
namespace tensor_operation {
......
#include "device_reduce_instance_threadwise.hpp"
#include "ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise.hpp"
namespace ck {
namespace tensor_operation {
......@@ -24,5 +24,4 @@ ADD_THREADWISE_INST_BY_ID(half_t, float, half_t, 7, 0, 0, 2, 1);
} // namespace device_reduce_instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
#include "device_reduce_instance_threadwise.hpp"
#include "ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise.hpp"
namespace ck {
namespace tensor_operation {
......
#include "device_reduce_instance_threadwise.hpp"
#include "ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise.hpp"
namespace ck {
namespace tensor_operation {
......
#include "device_reduce_instance_threadwise.hpp"
#include "ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise.hpp"
namespace ck {
namespace tensor_operation {
......@@ -48,5 +48,4 @@ ADD_THREADWISE_INST_BY_ID(double, double, double, 4, 0, 1, 2, 1);
} // namespace device_reduce_instance
} // namespace device
} // namespace tensor_operation
} // namespace ck
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment