#ifndef CK_CONFIG_NVIDIA_HPP #define CK_CONFIG_NVIDIA_HPP #include #include #include // index type: unsigned or signed #define CK_UNSIGNED_INDEX_TYPE 0 // device backend #define CK_DEVICE_BACKEND_NVIDIA 1 // disable AMD inline asm and intrinsic #define CK_USE_AMD_INLINE_ASM 0 #define CK_THREADWISE_GEMM_USE_AMD_INLINE_ASM 0 #define CK_USE_AMD_BUFFER_ADDRESSING 0 #define CK_USE_AMD_BUFFER_ADDRESSING_INTRINSIC 0 #define CK_USE_AMD_XDLOPS 0 #define CK_USE_AMD_XDLOPS_INLINE_ASM 0 #define CK_USE_AMD_XDLOPS_EMULATE 0 // experimental implementation #define CK_EXPERIMENTAL_BLOCKWISE_GEMM_USE_PIPELINE 0 #define CK_EXPERIMENTAL_TENSOR_COORDINATE_USE_CALCULATE_OFFSET_DIFF 0 #define CK_EXPERIMENTAL_THREADWISE_COPY_V4R2_USE_OPTIMIZED_ADDRESS_CACLULATION 0 #define CK_EXPERIMENTAL_USE_MORE_COMPILE_STATIC_BLOCKWISE_GENERIC_SLICE_COPY_V1 0 #define CK_EXPERIMENTAL_USE_MORE_COMPILE_STATIC_THREADWISE_GENERIC_TENSOR_SLICE_COPY_V1R2 0 #define CK_EXPERIMENTAL_USE_MORE_COMPILE_STATIC_THREADWISE_GENERIC_TENSOR_SLICE_COPY_V2R1 0 namespace ck { enum AddressSpace { Generic, Global, Lds, Vgpr }; enum InMemoryDataOperation { Set, AtomicAdd }; #if CK_UNSIGNED_INDEX_TYPE using index_t = uint32_t; #else using index_t = int32_t; #endif } // namespace ck #endif