Unverified Commit bf0c825d authored by zhangyue's avatar zhangyue Committed by GitHub
Browse files

issue/995 fix paged attn on iluvatar

parent 70862bcc
...@@ -30,7 +30,11 @@ __device__ __forceinline__ float warpReduceMax(float x) { ...@@ -30,7 +30,11 @@ __device__ __forceinline__ float warpReduceMax(float x) {
} }
__device__ __forceinline__ unsigned int cvtaToShared(const void *ptr) { __device__ __forceinline__ unsigned int cvtaToShared(const void *ptr) {
#if defined(ENABLE_ILUVATAR_API)
return static_cast<unsigned int>(reinterpret_cast<uintptr_t>(ptr));
#else
return static_cast<unsigned int>(__cvta_generic_to_shared(ptr)); return static_cast<unsigned int>(__cvta_generic_to_shared(ptr));
#endif
} }
__device__ __forceinline__ void cpAsyncCaSharedGlobal16(void *dst_shared, const void *src_global) { __device__ __forceinline__ void cpAsyncCaSharedGlobal16(void *dst_shared, const void *src_global) {
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
#include "../../handle.h" #include "../../handle.h"
#include "infiniop/ops/paged_attention.h" #include "infiniop/ops/paged_attention.h"
#ifdef ENABLE_NVIDIA_API #if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API)
#include "nvidia/paged_attention_nvidia.cuh" #include "nvidia/paged_attention_nvidia.cuh"
#endif #endif
#ifdef ENABLE_METAX_API #ifdef ENABLE_METAX_API
...@@ -36,6 +36,9 @@ __C infiniStatus_t infiniopCreatePagedAttentionDescriptor( ...@@ -36,6 +36,9 @@ __C infiniStatus_t infiniopCreatePagedAttentionDescriptor(
#endif #endif
#ifdef ENABLE_METAX_API #ifdef ENABLE_METAX_API
CREATE(INFINI_DEVICE_METAX, metax) CREATE(INFINI_DEVICE_METAX, metax)
#endif
#ifdef ENABLE_ILUVATAR_API
CREATE(INFINI_DEVICE_ILUVATAR, nvidia)
#endif #endif
default: default:
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
...@@ -57,6 +60,9 @@ __C infiniStatus_t infiniopGetPagedAttentionWorkspaceSize( ...@@ -57,6 +60,9 @@ __C infiniStatus_t infiniopGetPagedAttentionWorkspaceSize(
#endif #endif
#ifdef ENABLE_METAX_API #ifdef ENABLE_METAX_API
GET(INFINI_DEVICE_METAX, metax) GET(INFINI_DEVICE_METAX, metax)
#endif
#ifdef ENABLE_ILUVATAR_API
GET(INFINI_DEVICE_ILUVATAR, nvidia)
#endif #endif
default: default:
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
...@@ -82,6 +88,9 @@ __C infiniStatus_t infiniopPagedAttention( ...@@ -82,6 +88,9 @@ __C infiniStatus_t infiniopPagedAttention(
#endif #endif
#ifdef ENABLE_METAX_API #ifdef ENABLE_METAX_API
CALCULATE(INFINI_DEVICE_METAX, metax) CALCULATE(INFINI_DEVICE_METAX, metax)
#endif
#ifdef ENABLE_ILUVATAR_API
CALCULATE(INFINI_DEVICE_ILUVATAR, nvidia)
#endif #endif
default: default:
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
...@@ -102,6 +111,9 @@ __C infiniStatus_t infiniopDestroyPagedAttentionDescriptor( ...@@ -102,6 +111,9 @@ __C infiniStatus_t infiniopDestroyPagedAttentionDescriptor(
#endif #endif
#ifdef ENABLE_METAX_API #ifdef ENABLE_METAX_API
DESTROY(INFINI_DEVICE_METAX, metax) DESTROY(INFINI_DEVICE_METAX, metax)
#endif
#ifdef ENABLE_ILUVATAR_API
DESTROY(INFINI_DEVICE_ILUVATAR, nvidia)
#endif #endif
default: default:
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
#include "../../handle.h" #include "../../handle.h"
#include "infiniop/ops/paged_attention_prefill.h" #include "infiniop/ops/paged_attention_prefill.h"
#ifdef ENABLE_NVIDIA_API #if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API)
#include "nvidia/paged_attention_prefill_nvidia.cuh" #include "nvidia/paged_attention_prefill_nvidia.cuh"
#endif #endif
#ifdef ENABLE_METAX_API #ifdef ENABLE_METAX_API
...@@ -38,6 +38,9 @@ __C infiniStatus_t infiniopCreatePagedAttentionPrefillDescriptor( ...@@ -38,6 +38,9 @@ __C infiniStatus_t infiniopCreatePagedAttentionPrefillDescriptor(
#endif #endif
#ifdef ENABLE_METAX_API #ifdef ENABLE_METAX_API
CREATE(INFINI_DEVICE_METAX, metax) CREATE(INFINI_DEVICE_METAX, metax)
#endif
#ifdef ENABLE_ILUVATAR_API
CREATE(INFINI_DEVICE_ILUVATAR, nvidia)
#endif #endif
default: default:
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
...@@ -59,6 +62,9 @@ __C infiniStatus_t infiniopGetPagedAttentionPrefillWorkspaceSize( ...@@ -59,6 +62,9 @@ __C infiniStatus_t infiniopGetPagedAttentionPrefillWorkspaceSize(
#endif #endif
#ifdef ENABLE_METAX_API #ifdef ENABLE_METAX_API
GET(INFINI_DEVICE_METAX, metax) GET(INFINI_DEVICE_METAX, metax)
#endif
#ifdef ENABLE_ILUVATAR_API
GET(INFINI_DEVICE_ILUVATAR, nvidia)
#endif #endif
default: default:
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
...@@ -87,6 +93,9 @@ __C infiniStatus_t infiniopPagedAttentionPrefill( ...@@ -87,6 +93,9 @@ __C infiniStatus_t infiniopPagedAttentionPrefill(
#endif #endif
#ifdef ENABLE_METAX_API #ifdef ENABLE_METAX_API
CALCULATE(INFINI_DEVICE_METAX, metax) CALCULATE(INFINI_DEVICE_METAX, metax)
#endif
#ifdef ENABLE_ILUVATAR_API
CALCULATE(INFINI_DEVICE_ILUVATAR, nvidia)
#endif #endif
default: default:
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
...@@ -107,6 +116,9 @@ __C infiniStatus_t infiniopDestroyPagedAttentionPrefillDescriptor( ...@@ -107,6 +116,9 @@ __C infiniStatus_t infiniopDestroyPagedAttentionPrefillDescriptor(
#endif #endif
#ifdef ENABLE_METAX_API #ifdef ENABLE_METAX_API
DESTROY(INFINI_DEVICE_METAX, metax) DESTROY(INFINI_DEVICE_METAX, metax)
#endif
#ifdef ENABLE_ILUVATAR_API
DESTROY(INFINI_DEVICE_ILUVATAR, nvidia)
#endif #endif
default: default:
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
......
...@@ -2,7 +2,7 @@ ...@@ -2,7 +2,7 @@
#include "../../handle.h" #include "../../handle.h"
#include "infiniop/ops/paged_caching.h" #include "infiniop/ops/paged_caching.h"
#ifdef ENABLE_NVIDIA_API #if defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API)
#include "nvidia/paged_caching_nvidia.cuh" #include "nvidia/paged_caching_nvidia.cuh"
#endif #endif
#ifdef ENABLE_METAX_API #ifdef ENABLE_METAX_API
...@@ -31,6 +31,9 @@ __C infiniStatus_t infiniopCreatePagedCachingDescriptor( ...@@ -31,6 +31,9 @@ __C infiniStatus_t infiniopCreatePagedCachingDescriptor(
#endif #endif
#ifdef ENABLE_METAX_API #ifdef ENABLE_METAX_API
CREATE(INFINI_DEVICE_METAX, metax) CREATE(INFINI_DEVICE_METAX, metax)
#endif
#ifdef ENABLE_ILUVATAR_API
CREATE(INFINI_DEVICE_ILUVATAR, nvidia)
#endif #endif
default: default:
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
...@@ -52,6 +55,9 @@ __C infiniStatus_t infiniopGetPagedCachingWorkspaceSize( ...@@ -52,6 +55,9 @@ __C infiniStatus_t infiniopGetPagedCachingWorkspaceSize(
#endif #endif
#ifdef ENABLE_METAX_API #ifdef ENABLE_METAX_API
GET(INFINI_DEVICE_METAX, metax) GET(INFINI_DEVICE_METAX, metax)
#endif
#ifdef ENABLE_ILUVATAR_API
GET(INFINI_DEVICE_ILUVATAR, nvidia)
#endif #endif
default: default:
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
...@@ -77,6 +83,9 @@ __C infiniStatus_t infiniopPagedCaching( ...@@ -77,6 +83,9 @@ __C infiniStatus_t infiniopPagedCaching(
#endif #endif
#ifdef ENABLE_METAX_API #ifdef ENABLE_METAX_API
CALCULATE(INFINI_DEVICE_METAX, metax) CALCULATE(INFINI_DEVICE_METAX, metax)
#endif
#ifdef ENABLE_ILUVATAR_API
CALCULATE(INFINI_DEVICE_ILUVATAR, nvidia)
#endif #endif
default: default:
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
...@@ -97,6 +106,9 @@ __C infiniStatus_t infiniopDestroyPagedCachingDescriptor( ...@@ -97,6 +106,9 @@ __C infiniStatus_t infiniopDestroyPagedCachingDescriptor(
#endif #endif
#ifdef ENABLE_METAX_API #ifdef ENABLE_METAX_API
DESTROY(INFINI_DEVICE_METAX, metax) DESTROY(INFINI_DEVICE_METAX, metax)
#endif
#ifdef ENABLE_ILUVATAR_API
DESTROY(INFINI_DEVICE_ILUVATAR, nvidia)
#endif #endif
default: default:
return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED; return INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED;
......
...@@ -42,14 +42,14 @@ target("infiniop-iluvatar") ...@@ -42,14 +42,14 @@ target("infiniop-iluvatar")
add_links("cudart", "cublas", "cudnn") add_links("cudart", "cublas", "cudnn")
set_warnings("all", "error") set_warnings("all", "error")
add_cuflags("-Wno-error=unused-private-field") add_cuflags("-Wno-error=unused-private-field", "-Wno-error=unused-variable", "-Wno-unused-variable")
add_cuflags("-fPIC", "-x", "ivcore", "-std=c++17", {force = true}) add_cuflags("-fPIC", "-x", "ivcore", "-std=c++17", {force = true})
if has_config("ivcore-20") then if has_config("ivcore-20") then
add_cuflags("--cuda-gpu-arch=ivcore20", {force = true}) add_cuflags("--cuda-gpu-arch=ivcore20", {force = true})
end end
add_culdflags("-fPIC") add_culdflags("-fPIC")
add_cxflags("-fPIC") add_cxflags("-fPIC", "-Wno-error=unused-variable", "-Wno-unused-variable")
add_cxxflags("-fPIC") add_cxxflags("-fPIC", "-Wno-error=unused-variable", "-Wno-unused-variable")
-- set_languages("cxx17") 天数似乎不能用这个配置 -- set_languages("cxx17") 天数似乎不能用这个配置
add_files("../src/infiniop/devices/nvidia/*.cu", "../src/infiniop/ops/*/nvidia/*.cu") add_files("../src/infiniop/devices/nvidia/*.cu", "../src/infiniop/ops/*/nvidia/*.cu")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment