Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinilm
Commits
36890e40
Commit
36890e40
authored
Sep 16, 2025
by
PanZezhong1725
Browse files
fix: use new rope
parent
a179dcc3
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
9 additions
and
39 deletions
+9
-39
src/cache_manager/opcache_manager.hpp
src/cache_manager/opcache_manager.hpp
+0
-2
src/models/inference_context.cpp
src/models/inference_context.cpp
+4
-28
src/models/inference_context.hpp
src/models/inference_context.hpp
+4
-8
src/utils.hpp
src/utils.hpp
+1
-1
No files found.
src/cache_manager/opcache_manager.hpp
View file @
36890e40
...
...
@@ -156,7 +156,6 @@ public:
DECLARE_OP_CACHE
(
RMSNorm
)
DECLARE_OP_CACHE
(
Gemm
)
DECLARE_OP_CACHE
(
RoPE
)
DECLARE_OP_CACHE
(
RoPEv2
)
DECLARE_OP_CACHE
(
Rearrange
)
DECLARE_OP_CACHE
(
CausalSoftmax
)
DECLARE_OP_CACHE
(
Topkrouter
)
...
...
@@ -169,7 +168,6 @@ public:
RMSNorm_cache
(
capacity
,
DESTROY_FUNC
(
RMSNorm
)),
Gemm_cache
(
capacity
,
DESTROY_FUNC
(
Gemm
)),
RoPE_cache
(
capacity
,
DESTROY_FUNC
(
RoPE
)),
RoPEv2_cache
(
capacity
,
DESTROY_FUNC
(
RoPEv2
)),
Rearrange_cache
(
capacity
,
DESTROY_FUNC
(
Rearrange
)),
CausalSoftmax_cache
(
capacity
,
DESTROY_FUNC
(
CausalSoftmax
)),
Topkrouter_cache
(
capacity
,
DESTROY_FUNC
(
Topkrouter
)),
...
...
src/models/inference_context.cpp
View file @
36890e40
...
...
@@ -99,14 +99,16 @@ void InferenceContext::rope(std::shared_ptr<Tensor> q,
std
::
shared_ptr
<
Tensor
>
k
,
std
::
shared_ptr
<
Tensor
>
pos
,
std
::
shared_ptr
<
Tensor
>
sin
,
std
::
shared_ptr
<
Tensor
>
cos
)
{
std
::
shared_ptr
<
Tensor
>
cos
,
infiniopRoPEAlgo_t
algo
)
{
size_t
key
=
CacheManager
::
createDescriptorKey
(
q
,
k
,
pos
,
sin
,
cos
);
hash_combine
(
key
,
std
::
hash
<
int
>
()(
algo
));
infiniopRoPEDescriptor_t
desc
;
if
(
!
cache_manager
->
getRoPEDescriptor
(
key
,
desc
))
{
RUN_INFINI
(
infiniopCreateRoPEDescriptor
(
op_handle
,
&
desc
,
q
->
desc
(),
k
->
desc
(),
pos
->
desc
(),
sin
->
desc
(),
cos
->
desc
()));
pos
->
desc
(),
sin
->
desc
(),
cos
->
desc
()
,
algo
));
cache_manager
->
putRoPEDescriptor
(
key
,
desc
);
}
...
...
@@ -121,32 +123,6 @@ void InferenceContext::rope(std::shared_ptr<Tensor> q,
sin
->
data
(),
cos
->
data
(),
stream
));
}
void
InferenceContext
::
rope_v2
(
std
::
shared_ptr
<
Tensor
>
q
,
std
::
shared_ptr
<
Tensor
>
k
,
std
::
shared_ptr
<
Tensor
>
pos
,
std
::
shared_ptr
<
Tensor
>
sin
,
std
::
shared_ptr
<
Tensor
>
cos
)
{
size_t
key
=
CacheManager
::
createDescriptorKey
(
q
,
k
,
pos
,
sin
,
cos
);
infiniopRoPEv2Descriptor_t
desc
;
if
(
!
cache_manager
->
getRoPEv2Descriptor
(
key
,
desc
))
{
RUN_INFINI
(
infiniopCreateRoPEv2Descriptor
(
op_handle
,
&
desc
,
q
->
desc
(),
k
->
desc
(),
pos
->
desc
(),
sin
->
desc
(),
cos
->
desc
()));
cache_manager
->
putRoPEv2Descriptor
(
key
,
desc
);
}
size_t
workspace_size
=
0
;
RUN_INFINI
(
infiniopGetRoPEv2WorkspaceSize
(
desc
,
&
workspace_size
));
ensure_workspace
(
workspace_size
);
void
*
workspace
=
workspace_storage
->
memory
();
RUN_INFINI
(
infiniopRoPEv2
(
desc
,
workspace
,
workspace_size
,
q
->
data
(),
k
->
data
(),
pos
->
data
(),
sin
->
data
(),
cos
->
data
(),
stream
));
}
void
InferenceContext
::
causalSoftmax
(
std
::
shared_ptr
<
Tensor
>
y
,
std
::
shared_ptr
<
Tensor
>
x
)
{
size_t
key
=
CacheManager
::
createDescriptorKey
(
y
,
x
);
...
...
src/models/inference_context.hpp
View file @
36890e40
...
...
@@ -33,12 +33,8 @@ struct InferenceContext {
std
::
shared_ptr
<
Tensor
>
k
,
std
::
shared_ptr
<
Tensor
>
pos
,
std
::
shared_ptr
<
Tensor
>
sin
,
std
::
shared_ptr
<
Tensor
>
cos
);
void
rope_v2
(
std
::
shared_ptr
<
Tensor
>
q
,
std
::
shared_ptr
<
Tensor
>
k
,
std
::
shared_ptr
<
Tensor
>
pos
,
std
::
shared_ptr
<
Tensor
>
sin
,
std
::
shared_ptr
<
Tensor
>
cos
);
std
::
shared_ptr
<
Tensor
>
cos
,
infiniopRoPEAlgo_t
algo
);
void
causalSoftmax
(
std
::
shared_ptr
<
Tensor
>
y
,
std
::
shared_ptr
<
Tensor
>
x
);
...
...
@@ -102,13 +98,13 @@ inline void rearrange(std::shared_ptr<Tensor> dst, std::shared_ptr<Tensor> src)
inline
void
rope
(
std
::
shared_ptr
<
Tensor
>
q
,
std
::
shared_ptr
<
Tensor
>
k
,
std
::
shared_ptr
<
Tensor
>
pos
,
std
::
shared_ptr
<
Tensor
>
sin
,
std
::
shared_ptr
<
Tensor
>
cos
)
{
getInferenceContext
().
rope
(
q
,
k
,
pos
,
sin
,
cos
);
getInferenceContext
().
rope
(
q
,
k
,
pos
,
sin
,
cos
,
INFINIOP_ROPE_ALGO_GPT_J
);
}
inline
void
rope_v2
(
std
::
shared_ptr
<
Tensor
>
q
,
std
::
shared_ptr
<
Tensor
>
k
,
std
::
shared_ptr
<
Tensor
>
pos
,
std
::
shared_ptr
<
Tensor
>
sin
,
std
::
shared_ptr
<
Tensor
>
cos
)
{
getInferenceContext
().
rope
_v2
(
q
,
k
,
pos
,
sin
,
cos
);
getInferenceContext
().
rope
(
q
,
k
,
pos
,
sin
,
cos
,
INFINIOP_ROPE_ALGO_GPT_NEOX
);
}
inline
void
causalSoftmax
(
std
::
shared_ptr
<
Tensor
>
y
,
std
::
shared_ptr
<
Tensor
>
x
)
{
...
...
src/utils.hpp
View file @
36890e40
#ifndef INFINICORE_INFER_UTILS_H
#define INFINICORE_INFER_UTILS_H
#include <infini
core
.h>
#include <infini
rt
.h>
#include <cstring>
#include <iostream>
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment