Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinicore
Commits
bf0c825d
Unverified
Commit
bf0c825d
authored
Jan 29, 2026
by
zhangyue
Committed by
GitHub
Jan 29, 2026
Browse files
issue/995 fix paged attn on iluvatar
parent
70862bcc
Changes
5
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
46 additions
and
6 deletions
+46
-6
src/infiniop/ops/paged_attention/cuda/kernel_v2.cuh
src/infiniop/ops/paged_attention/cuda/kernel_v2.cuh
+4
-0
src/infiniop/ops/paged_attention/operator.cc
src/infiniop/ops/paged_attention/operator.cc
+13
-1
src/infiniop/ops/paged_attention_prefill/operator.cc
src/infiniop/ops/paged_attention_prefill/operator.cc
+13
-1
src/infiniop/ops/paged_caching/operator.cc
src/infiniop/ops/paged_caching/operator.cc
+13
-1
xmake/iluvatar.lua
xmake/iluvatar.lua
+3
-3
No files found.
src/infiniop/ops/paged_attention/cuda/kernel_v2.cuh
View file @
bf0c825d
...
...
@@ -30,7 +30,11 @@ __device__ __forceinline__ float warpReduceMax(float x) {
}
__device__
__forceinline__
unsigned
int
cvtaToShared
(
const
void
*
ptr
)
{
#if defined(ENABLE_ILUVATAR_API)
return
static_cast
<
unsigned
int
>
(
reinterpret_cast
<
uintptr_t
>
(
ptr
));
#else
return
static_cast
<
unsigned
int
>
(
__cvta_generic_to_shared
(
ptr
));
#endif
}
__device__
__forceinline__
void
cpAsyncCaSharedGlobal16
(
void
*
dst_shared
,
const
void
*
src_global
)
{
...
...
src/infiniop/ops/paged_attention/operator.cc
View file @
bf0c825d
...
...
@@ -2,7 +2,7 @@
#include "../../handle.h"
#include "infiniop/ops/paged_attention.h"
#ifdef
ENABLE_NVIDIA_API
#if
def
ined(
ENABLE_NVIDIA_API
) || defined(ENABLE_ILUVATAR_API)
#include "nvidia/paged_attention_nvidia.cuh"
#endif
#ifdef ENABLE_METAX_API
...
...
@@ -36,6 +36,9 @@ __C infiniStatus_t infiniopCreatePagedAttentionDescriptor(
#endif
#ifdef ENABLE_METAX_API
CREATE
(
INFINI_DEVICE_METAX
,
metax
)
#endif
#ifdef ENABLE_ILUVATAR_API
CREATE
(
INFINI_DEVICE_ILUVATAR
,
nvidia
)
#endif
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
...
...
@@ -57,6 +60,9 @@ __C infiniStatus_t infiniopGetPagedAttentionWorkspaceSize(
#endif
#ifdef ENABLE_METAX_API
GET
(
INFINI_DEVICE_METAX
,
metax
)
#endif
#ifdef ENABLE_ILUVATAR_API
GET
(
INFINI_DEVICE_ILUVATAR
,
nvidia
)
#endif
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
...
...
@@ -82,6 +88,9 @@ __C infiniStatus_t infiniopPagedAttention(
#endif
#ifdef ENABLE_METAX_API
CALCULATE
(
INFINI_DEVICE_METAX
,
metax
)
#endif
#ifdef ENABLE_ILUVATAR_API
CALCULATE
(
INFINI_DEVICE_ILUVATAR
,
nvidia
)
#endif
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
...
...
@@ -102,6 +111,9 @@ __C infiniStatus_t infiniopDestroyPagedAttentionDescriptor(
#endif
#ifdef ENABLE_METAX_API
DESTROY
(
INFINI_DEVICE_METAX
,
metax
)
#endif
#ifdef ENABLE_ILUVATAR_API
DESTROY
(
INFINI_DEVICE_ILUVATAR
,
nvidia
)
#endif
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
...
...
src/infiniop/ops/paged_attention_prefill/operator.cc
View file @
bf0c825d
...
...
@@ -2,7 +2,7 @@
#include "../../handle.h"
#include "infiniop/ops/paged_attention_prefill.h"
#ifdef
ENABLE_NVIDIA_API
#if
def
ined(
ENABLE_NVIDIA_API
) || defined(ENABLE_ILUVATAR_API)
#include "nvidia/paged_attention_prefill_nvidia.cuh"
#endif
#ifdef ENABLE_METAX_API
...
...
@@ -38,6 +38,9 @@ __C infiniStatus_t infiniopCreatePagedAttentionPrefillDescriptor(
#endif
#ifdef ENABLE_METAX_API
CREATE
(
INFINI_DEVICE_METAX
,
metax
)
#endif
#ifdef ENABLE_ILUVATAR_API
CREATE
(
INFINI_DEVICE_ILUVATAR
,
nvidia
)
#endif
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
...
...
@@ -59,6 +62,9 @@ __C infiniStatus_t infiniopGetPagedAttentionPrefillWorkspaceSize(
#endif
#ifdef ENABLE_METAX_API
GET
(
INFINI_DEVICE_METAX
,
metax
)
#endif
#ifdef ENABLE_ILUVATAR_API
GET
(
INFINI_DEVICE_ILUVATAR
,
nvidia
)
#endif
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
...
...
@@ -87,6 +93,9 @@ __C infiniStatus_t infiniopPagedAttentionPrefill(
#endif
#ifdef ENABLE_METAX_API
CALCULATE
(
INFINI_DEVICE_METAX
,
metax
)
#endif
#ifdef ENABLE_ILUVATAR_API
CALCULATE
(
INFINI_DEVICE_ILUVATAR
,
nvidia
)
#endif
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
...
...
@@ -107,6 +116,9 @@ __C infiniStatus_t infiniopDestroyPagedAttentionPrefillDescriptor(
#endif
#ifdef ENABLE_METAX_API
DESTROY
(
INFINI_DEVICE_METAX
,
metax
)
#endif
#ifdef ENABLE_ILUVATAR_API
DESTROY
(
INFINI_DEVICE_ILUVATAR
,
nvidia
)
#endif
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
...
...
src/infiniop/ops/paged_caching/operator.cc
View file @
bf0c825d
...
...
@@ -2,7 +2,7 @@
#include "../../handle.h"
#include "infiniop/ops/paged_caching.h"
#ifdef
ENABLE_NVIDIA_API
#if
def
ined(
ENABLE_NVIDIA_API
) || defined(ENABLE_ILUVATAR_API)
#include "nvidia/paged_caching_nvidia.cuh"
#endif
#ifdef ENABLE_METAX_API
...
...
@@ -31,6 +31,9 @@ __C infiniStatus_t infiniopCreatePagedCachingDescriptor(
#endif
#ifdef ENABLE_METAX_API
CREATE
(
INFINI_DEVICE_METAX
,
metax
)
#endif
#ifdef ENABLE_ILUVATAR_API
CREATE
(
INFINI_DEVICE_ILUVATAR
,
nvidia
)
#endif
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
...
...
@@ -52,6 +55,9 @@ __C infiniStatus_t infiniopGetPagedCachingWorkspaceSize(
#endif
#ifdef ENABLE_METAX_API
GET
(
INFINI_DEVICE_METAX
,
metax
)
#endif
#ifdef ENABLE_ILUVATAR_API
GET
(
INFINI_DEVICE_ILUVATAR
,
nvidia
)
#endif
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
...
...
@@ -77,6 +83,9 @@ __C infiniStatus_t infiniopPagedCaching(
#endif
#ifdef ENABLE_METAX_API
CALCULATE
(
INFINI_DEVICE_METAX
,
metax
)
#endif
#ifdef ENABLE_ILUVATAR_API
CALCULATE
(
INFINI_DEVICE_ILUVATAR
,
nvidia
)
#endif
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
...
...
@@ -97,6 +106,9 @@ __C infiniStatus_t infiniopDestroyPagedCachingDescriptor(
#endif
#ifdef ENABLE_METAX_API
DESTROY
(
INFINI_DEVICE_METAX
,
metax
)
#endif
#ifdef ENABLE_ILUVATAR_API
DESTROY
(
INFINI_DEVICE_ILUVATAR
,
nvidia
)
#endif
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
...
...
xmake/iluvatar.lua
View file @
bf0c825d
...
...
@@ -42,14 +42,14 @@ target("infiniop-iluvatar")
add_links
(
"cudart"
,
"cublas"
,
"cudnn"
)
set_warnings
(
"all"
,
"error"
)
add_cuflags
(
"-Wno-error=unused-private-field"
)
add_cuflags
(
"-Wno-error=unused-private-field"
,
"-Wno-error=unused-variable"
,
"-Wno-unused-variable"
)
add_cuflags
(
"-fPIC"
,
"-x"
,
"ivcore"
,
"-std=c++17"
,
{
force
=
true
})
if
has_config
(
"ivcore-20"
)
then
add_cuflags
(
"--cuda-gpu-arch=ivcore20"
,
{
force
=
true
})
end
add_culdflags
(
"-fPIC"
)
add_cxflags
(
"-fPIC"
)
add_cxxflags
(
"-fPIC"
)
add_cxflags
(
"-fPIC"
,
"-Wno-error=unused-variable"
,
"-Wno-unused-variable"
)
add_cxxflags
(
"-fPIC"
,
"-Wno-error=unused-variable"
,
"-Wno-unused-variable"
)
-- set_languages("cxx17") 天数似乎不能用这个配置
add_files
(
"../src/infiniop/devices/nvidia/*.cu"
,
"../src/infiniop/ops/*/nvidia/*.cu"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment