Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinilm
Commits
d7965f91
Commit
d7965f91
authored
Jul 18, 2025
by
wooway777
Browse files
issue/21 - Initial Modualization
parent
f59c7bf5
Changes
8
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
599 additions
and
245 deletions
+599
-245
src/models/cache_manager.hpp
src/models/cache_manager.hpp
+295
-0
src/models/inference_context.cpp
src/models/inference_context.cpp
+188
-0
src/models/inference_context.hpp
src/models/inference_context.hpp
+39
-0
src/models/jiuge/jiuge.cpp
src/models/jiuge/jiuge.cpp
+73
-244
src/tensor.hpp
src/tensor.hpp
+1
-0
src/tensor/strorage.cpp
src/tensor/strorage.cpp
+1
-1
src/tensor/tensor.cpp
src/tensor/tensor.cpp
+1
-0
xmake.lua
xmake.lua
+1
-0
No files found.
src/models/cache_manager.hpp
0 → 100644
View file @
d7965f91
#ifndef CACHE_MANAGER_HPP
#define CACHE_MANAGER_HPP
#include <functional>
#include <memory>
#include <unordered_map>
#include <vector>
#include "../tensor.hpp"
#include "../utils.hpp"
#include "infinicore_infer.h"
// Hash combine utility (similar to boost::hash_combine)
inline
void
hash_combine
(
size_t
&
seed
,
size_t
value
)
{
seed
^=
value
+
0x9e3779b9
+
(
seed
<<
6
)
+
(
seed
>>
2
);
}
// Specialization for enum types
template
<
typename
T
>
inline
void
hash_combine
(
size_t
&
seed
,
T
value
,
typename
std
::
enable_if
<
std
::
is_enum
<
T
>::
value
>::
type
*
=
0
)
{
hash_combine
(
seed
,
static_cast
<
size_t
>
(
value
));
}
// Specialization for float to handle potential precision issues
inline
void
hash_combine
(
size_t
&
seed
,
float
value
)
{
// Treat float bits as uint32_t for consistent hashing
uint32_t
int_value
;
static_assert
(
sizeof
(
value
)
==
sizeof
(
int_value
),
"Size mismatch"
);
std
::
memcpy
(
&
int_value
,
&
value
,
sizeof
(
value
));
hash_combine
(
seed
,
static_cast
<
size_t
>
(
int_value
));
}
// Helper function to compute hash for tensor descriptors
inline
size_t
computeTensorDescHash
(
std
::
shared_ptr
<
TensorDesc
>
desc
)
{
size_t
seed
=
0
;
hash_combine
(
seed
,
desc
->
dtype
());
for
(
auto
dim
:
desc
->
shape
())
{
hash_combine
(
seed
,
dim
);
}
for
(
auto
stride
:
desc
->
strides
())
{
hash_combine
(
seed
,
static_cast
<
size_t
>
(
stride
));
}
return
seed
;
}
enum
class
OperatorType
{
RMS_NORM
,
GEMM
,
ROPE
,
REARRANGE
,
CAUSAL_SOFTMAX
,
SWIGLU
,
RANDOM_SAMPLE
};
template
<
typename
DescriptorType
>
class
LRUDescriptorCache
{
private:
struct
CacheNode
{
size_t
key
;
DescriptorType
desc
;
CacheNode
*
prev
;
CacheNode
*
next
;
CacheNode
()
:
key
(
0
),
desc
(),
prev
(
nullptr
),
next
(
nullptr
)
{}
CacheNode
(
size_t
k
,
const
DescriptorType
&
d
)
:
key
(
k
),
desc
(
d
),
prev
(
nullptr
),
next
(
nullptr
)
{}
};
std
::
unordered_map
<
size_t
,
CacheNode
*>
cache
;
CacheNode
*
head
;
CacheNode
*
tail
;
const
size_t
capacity
;
size_t
size
;
const
OperatorType
opType
;
void
destroyDescriptor
(
DescriptorType
&
desc
)
{
switch
(
opType
)
{
case
OperatorType
::
RMS_NORM
:
infiniopDestroyRMSNormDescriptor
(
desc
);
break
;
case
OperatorType
::
GEMM
:
infiniopDestroyGemmDescriptor
(
desc
);
break
;
case
OperatorType
::
ROPE
:
infiniopDestroyRoPEDescriptor
(
desc
);
break
;
case
OperatorType
::
REARRANGE
:
infiniopDestroyRearrangeDescriptor
(
desc
);
break
;
case
OperatorType
::
CAUSAL_SOFTMAX
:
infiniopDestroyCausalSoftmaxDescriptor
(
desc
);
break
;
case
OperatorType
::
SWIGLU
:
infiniopDestroySwiGLUDescriptor
(
desc
);
break
;
case
OperatorType
::
RANDOM_SAMPLE
:
infiniopDestroyRandomSampleDescriptor
(
desc
);
break
;
default:
throw
std
::
runtime_error
(
"Unknown descriptor type"
);
}
}
void
removeNode
(
CacheNode
*
node
)
{
node
->
prev
->
next
=
node
->
next
;
node
->
next
->
prev
=
node
->
prev
;
destroyDescriptor
(
node
->
desc
);
cache
.
erase
(
node
->
key
);
delete
node
;
--
size
;
}
void
addToTop
(
CacheNode
*
node
)
{
node
->
next
=
head
->
next
;
node
->
next
->
prev
=
node
;
node
->
prev
=
head
;
head
->
next
=
node
;
cache
[
node
->
key
]
=
node
;
if
(
++
size
>
capacity
)
{
removeNode
(
tail
->
prev
);
}
}
void
moveToTop
(
CacheNode
*
node
)
{
node
->
prev
->
next
=
node
->
next
;
node
->
next
->
prev
=
node
->
prev
;
node
->
next
=
head
->
next
;
node
->
next
->
prev
=
node
;
node
->
prev
=
head
;
head
->
next
=
node
;
}
public:
LRUDescriptorCache
(
size_t
c
,
OperatorType
t
)
:
capacity
(
c
),
size
(
0
),
opType
(
t
)
{
head
=
new
CacheNode
();
tail
=
new
CacheNode
();
head
->
next
=
tail
;
tail
->
prev
=
head
;
}
~
LRUDescriptorCache
()
{
while
(
head
->
next
!=
tail
)
{
removeNode
(
head
->
next
);
}
delete
head
;
delete
tail
;
}
bool
get
(
size_t
key
,
DescriptorType
&
out_desc
)
{
auto
it
=
cache
.
find
(
key
);
if
(
it
==
cache
.
end
())
{
return
false
;
}
CacheNode
*
node
=
it
->
second
;
moveToTop
(
node
);
out_desc
=
node
->
desc
;
return
true
;
}
void
put
(
size_t
key
,
const
DescriptorType
&
descriptor
)
{
auto
it
=
cache
.
find
(
key
);
if
(
it
!=
cache
.
end
())
{
// Key already exists, update the descriptor
CacheNode
*
node
=
it
->
second
;
destroyDescriptor
(
node
->
desc
);
node
->
desc
=
descriptor
;
moveToTop
(
node
);
return
;
}
// Check if we need to evict
if
(
size
>=
capacity
)
{
removeNode
(
tail
->
prev
);
}
// Create new node and add to top
CacheNode
*
node
=
new
CacheNode
(
key
,
descriptor
);
addToTop
(
node
);
}
LRUDescriptorCache
(
const
LRUDescriptorCache
&
)
=
delete
;
LRUDescriptorCache
&
operator
=
(
const
LRUDescriptorCache
&
)
=
delete
;
};
class
CacheManager
{
private:
const
size_t
DEFAULT_CACHE_CAPACITY
=
100
;
LRUDescriptorCache
<
infiniopRMSNormDescriptor_t
>
rms_norm_cache
;
LRUDescriptorCache
<
infiniopGemmDescriptor_t
>
gemm_cache
;
LRUDescriptorCache
<
infiniopRoPEDescriptor_t
>
rope_cache
;
LRUDescriptorCache
<
infiniopRearrangeDescriptor_t
>
rearrange_cache
;
LRUDescriptorCache
<
infiniopCausalSoftmaxDescriptor_t
>
causal_softmax_cache
;
LRUDescriptorCache
<
infiniopSwiGLUDescriptor_t
>
swiglu_cache
;
LRUDescriptorCache
<
infiniopRandomSampleDescriptor_t
>
random_sample_cache
;
public:
CacheManager
(
size_t
capacity
=
100
)
:
rms_norm_cache
(
capacity
,
OperatorType
::
RMS_NORM
),
gemm_cache
(
capacity
,
OperatorType
::
GEMM
),
rope_cache
(
capacity
,
OperatorType
::
ROPE
),
rearrange_cache
(
capacity
,
OperatorType
::
REARRANGE
),
causal_softmax_cache
(
capacity
,
OperatorType
::
CAUSAL_SOFTMAX
),
swiglu_cache
(
capacity
,
OperatorType
::
SWIGLU
),
random_sample_cache
(
capacity
,
OperatorType
::
RANDOM_SAMPLE
)
{}
// RMSNorm operations
bool
getRMSNormDescriptor
(
size_t
key
,
infiniopRMSNormDescriptor_t
&
desc
)
{
return
rms_norm_cache
.
get
(
key
,
desc
);
}
void
putRMSNormDescriptor
(
size_t
key
,
const
infiniopRMSNormDescriptor_t
&
desc
)
{
rms_norm_cache
.
put
(
key
,
desc
);
}
// GEMM operations
bool
getGemmDescriptor
(
size_t
key
,
infiniopGemmDescriptor_t
&
desc
)
{
return
gemm_cache
.
get
(
key
,
desc
);
}
void
putGemmDescriptor
(
size_t
key
,
const
infiniopGemmDescriptor_t
&
desc
)
{
gemm_cache
.
put
(
key
,
desc
);
}
// RoPE operations
bool
getRoPEDescriptor
(
size_t
key
,
infiniopRoPEDescriptor_t
&
desc
)
{
return
rope_cache
.
get
(
key
,
desc
);
}
void
putRoPEDescriptor
(
size_t
key
,
const
infiniopRoPEDescriptor_t
&
desc
)
{
rope_cache
.
put
(
key
,
desc
);
}
// Rearrange operations
bool
getRearrangeDescriptor
(
size_t
key
,
infiniopRearrangeDescriptor_t
&
desc
)
{
return
rearrange_cache
.
get
(
key
,
desc
);
}
void
putRearrangeDescriptor
(
size_t
key
,
const
infiniopRearrangeDescriptor_t
&
desc
)
{
rearrange_cache
.
put
(
key
,
desc
);
}
// Softmax operations
bool
getCausalSoftmaxDescriptor
(
size_t
key
,
infiniopCausalSoftmaxDescriptor_t
&
desc
)
{
return
causal_softmax_cache
.
get
(
key
,
desc
);
}
void
putCausalSoftmaxDescriptor
(
size_t
key
,
const
infiniopCausalSoftmaxDescriptor_t
&
desc
)
{
causal_softmax_cache
.
put
(
key
,
desc
);
}
// SwiGLU operations
bool
getSwiGLUDescriptor
(
size_t
key
,
infiniopSwiGLUDescriptor_t
&
desc
)
{
return
swiglu_cache
.
get
(
key
,
desc
);
}
void
putSwiGLUDescriptor
(
size_t
key
,
const
infiniopSwiGLUDescriptor_t
&
desc
)
{
swiglu_cache
.
put
(
key
,
desc
);
}
// Random Sample operations
bool
getRandomSampleDescriptor
(
size_t
key
,
infiniopRandomSampleDescriptor_t
&
desc
)
{
return
random_sample_cache
.
get
(
key
,
desc
);
}
void
putRandomSampleDescriptor
(
size_t
key
,
const
infiniopRandomSampleDescriptor_t
&
desc
)
{
random_sample_cache
.
put
(
key
,
desc
);
}
static
size_t
createDescriptorKey
(
std
::
shared_ptr
<
TensorDesc
>
desc0
,
std
::
shared_ptr
<
TensorDesc
>
desc1
,
std
::
shared_ptr
<
TensorDesc
>
desc2
,
std
::
shared_ptr
<
TensorDesc
>
desc3
,
std
::
shared_ptr
<
TensorDesc
>
desc4
)
{
size_t
seed
=
0
;
if
(
desc0
)
{
hash_combine
(
seed
,
computeTensorDescHash
(
desc0
));
}
if
(
desc1
)
{
hash_combine
(
seed
,
computeTensorDescHash
(
desc1
));
}
if
(
desc2
)
{
hash_combine
(
seed
,
computeTensorDescHash
(
desc2
));
}
if
(
desc3
)
{
hash_combine
(
seed
,
computeTensorDescHash
(
desc3
));
}
if
(
desc4
)
{
hash_combine
(
seed
,
computeTensorDescHash
(
desc4
));
}
return
seed
;
}
};
#endif // CACHE_MANAGER_HPP
src/models/inference_context.cpp
0 → 100644
View file @
d7965f91
#include "inference_context.hpp"
#include "../tensor.hpp"
#include "../utils.hpp"
InferenceContext
::
InferenceContext
(
DeviceResource
*
rsrc
,
CacheManager
*
cache_manager
,
infinirtStream_t
stream
)
:
rsrc
(
rsrc
),
cache_manager
(
cache_manager
),
stream
(
stream
)
{}
void
InferenceContext
::
ensure_workspace
(
size_t
required_size
)
{
if
(
required_size
>
current_workspace_size
)
{
workspace_storage
=
Storage
::
createFromPool
(
required_size
,
rsrc
->
memory_pool
);
current_workspace_size
=
required_size
;
}
}
void
InferenceContext
::
rmsnorm
(
std
::
shared_ptr
<
Tensor
>
y
,
std
::
shared_ptr
<
Tensor
>
x
,
std
::
shared_ptr
<
Tensor
>
w
,
float
epsilon
)
{
size_t
key
=
CacheManager
::
createDescriptorKey
(
y
->
tdesc
(),
x
->
tdesc
(),
w
->
tdesc
(),
nullptr
,
nullptr
);
infiniopRMSNormDescriptor_t
desc
;
if
(
!
cache_manager
->
getRMSNormDescriptor
(
key
,
desc
))
{
RUN_INFINI
(
infiniopCreateRMSNormDescriptor
(
rsrc
->
handle
,
&
desc
,
y
->
desc
(),
x
->
desc
(),
w
->
desc
(),
epsilon
));
cache_manager
->
putRMSNormDescriptor
(
key
,
desc
);
}
size_t
workspace_size
=
0
;
RUN_INFINI
(
infiniopGetRMSNormWorkspaceSize
(
desc
,
&
workspace_size
));
ensure_workspace
(
workspace_size
);
void
*
workspace
=
workspace_storage
->
memory
();
RUN_INFINI
(
infiniopRMSNorm
(
desc
,
workspace
,
workspace_size
,
y
->
data
(),
x
->
data
(),
w
->
data
(),
stream
));
}
void
InferenceContext
::
gemm
(
std
::
shared_ptr
<
Tensor
>
c
,
std
::
shared_ptr
<
TensorDesc
>
c_desc_overwrite
,
std
::
shared_ptr
<
Tensor
>
a
,
std
::
shared_ptr
<
TensorDesc
>
a_desc_overwrite
,
std
::
shared_ptr
<
Tensor
>
b
,
std
::
shared_ptr
<
TensorDesc
>
b_desc_overwrite
,
float
alpha
,
float
beta
)
{
size_t
key
=
CacheManager
::
createDescriptorKey
(
c_desc_overwrite
?
c_desc_overwrite
:
c
->
tdesc
(),
a_desc_overwrite
?
a_desc_overwrite
:
a
->
tdesc
(),
b_desc_overwrite
?
b_desc_overwrite
:
b
->
tdesc
(),
nullptr
,
nullptr
);
infiniopGemmDescriptor_t
desc
;
if
(
!
cache_manager
->
getGemmDescriptor
(
key
,
desc
))
{
RUN_INFINI
(
infiniopCreateGemmDescriptor
(
rsrc
->
handle
,
&
desc
,
c_desc_overwrite
?
c_desc_overwrite
->
desc
()
:
c
->
desc
(),
a_desc_overwrite
?
a_desc_overwrite
->
desc
()
:
a
->
desc
(),
b_desc_overwrite
?
b_desc_overwrite
->
desc
()
:
b
->
desc
()));
cache_manager
->
putGemmDescriptor
(
key
,
desc
);
}
size_t
workspace_size
=
0
;
RUN_INFINI
(
infiniopGetGemmWorkspaceSize
(
desc
,
&
workspace_size
));
ensure_workspace
(
workspace_size
);
void
*
workspace
=
workspace_storage
->
memory
();
RUN_INFINI
(
infiniopGemm
(
desc
,
workspace
,
workspace_size
,
c
->
data
(),
a
->
data
(),
b
->
data
(),
alpha
,
beta
,
stream
));
}
void
InferenceContext
::
rearrange
(
std
::
shared_ptr
<
Tensor
>
dst
,
std
::
shared_ptr
<
TensorDesc
>
dst_desc_overwrite
,
std
::
shared_ptr
<
Tensor
>
src
,
std
::
shared_ptr
<
TensorDesc
>
src_desc_overwrite
)
{
size_t
key
=
CacheManager
::
createDescriptorKey
(
dst_desc_overwrite
?
dst_desc_overwrite
:
dst
->
tdesc
(),
src_desc_overwrite
?
src_desc_overwrite
:
src
->
tdesc
(),
nullptr
,
nullptr
,
nullptr
);
infiniopRearrangeDescriptor_t
desc
;
if
(
!
cache_manager
->
getRearrangeDescriptor
(
key
,
desc
))
{
RUN_INFINI
(
infiniopCreateRearrangeDescriptor
(
rsrc
->
handle
,
&
desc
,
dst_desc_overwrite
?
dst_desc_overwrite
->
desc
()
:
dst
->
desc
(),
src_desc_overwrite
?
src_desc_overwrite
->
desc
()
:
src
->
desc
()));
cache_manager
->
putRearrangeDescriptor
(
key
,
desc
);
}
RUN_INFINI
(
infiniopRearrange
(
desc
,
dst
->
data
(),
src
->
data
(),
stream
));
}
void
InferenceContext
::
rope
(
std
::
shared_ptr
<
Tensor
>
q
,
std
::
shared_ptr
<
Tensor
>
k
,
std
::
shared_ptr
<
Tensor
>
pos
,
std
::
shared_ptr
<
Tensor
>
sin
,
std
::
shared_ptr
<
Tensor
>
cos
)
{
size_t
key
=
CacheManager
::
createDescriptorKey
(
q
->
tdesc
(),
k
->
tdesc
(),
pos
->
tdesc
(),
sin
->
tdesc
(),
cos
->
tdesc
());
infiniopRoPEDescriptor_t
desc
;
if
(
!
cache_manager
->
getRoPEDescriptor
(
key
,
desc
))
{
RUN_INFINI
(
infiniopCreateRoPEDescriptor
(
rsrc
->
handle
,
&
desc
,
q
->
desc
(),
k
->
desc
(),
pos
->
desc
(),
sin
->
desc
(),
cos
->
desc
()));
cache_manager
->
putRoPEDescriptor
(
key
,
desc
);
}
size_t
workspace_size
=
0
;
RUN_INFINI
(
infiniopGetRoPEWorkspaceSize
(
desc
,
&
workspace_size
));
ensure_workspace
(
workspace_size
);
void
*
workspace
=
workspace_storage
->
memory
();
RUN_INFINI
(
infiniopRoPE
(
desc
,
workspace
,
workspace_size
,
q
->
data
(),
k
->
data
(),
pos
->
data
(),
sin
->
data
(),
cos
->
data
(),
stream
));
}
void
InferenceContext
::
causalSoftmax
(
std
::
shared_ptr
<
Tensor
>
y
,
std
::
shared_ptr
<
TensorDesc
>
y_desc_overwrite
,
std
::
shared_ptr
<
Tensor
>
x
,
std
::
shared_ptr
<
TensorDesc
>
x_desc_overwrite
)
{
size_t
key
=
CacheManager
::
createDescriptorKey
(
y_desc_overwrite
?
y_desc_overwrite
:
y
->
tdesc
(),
x_desc_overwrite
?
x_desc_overwrite
:
x
->
tdesc
(),
nullptr
,
nullptr
,
nullptr
);
infiniopCausalSoftmaxDescriptor_t
desc
;
if
(
!
cache_manager
->
getCausalSoftmaxDescriptor
(
key
,
desc
))
{
RUN_INFINI
(
infiniopCreateCausalSoftmaxDescriptor
(
rsrc
->
handle
,
&
desc
,
y_desc_overwrite
?
y_desc_overwrite
->
desc
()
:
y
->
desc
(),
x_desc_overwrite
?
x_desc_overwrite
->
desc
()
:
x
->
desc
()));
cache_manager
->
putCausalSoftmaxDescriptor
(
key
,
desc
);
}
size_t
workspace_size
=
0
;
RUN_INFINI
(
infiniopGetCausalSoftmaxWorkspaceSize
(
desc
,
&
workspace_size
));
ensure_workspace
(
workspace_size
);
void
*
workspace
=
workspace_storage
->
memory
();
RUN_INFINI
(
infiniopCausalSoftmax
(
desc
,
workspace
,
workspace_size
,
y
->
data
(),
x
->
data
(),
stream
));
}
void
InferenceContext
::
swiglu
(
std
::
shared_ptr
<
Tensor
>
out
,
std
::
shared_ptr
<
Tensor
>
up
,
std
::
shared_ptr
<
Tensor
>
gate
)
{
size_t
key
=
CacheManager
::
createDescriptorKey
(
out
->
tdesc
(),
up
->
tdesc
(),
gate
->
tdesc
(),
nullptr
,
nullptr
);
infiniopSwiGLUDescriptor_t
desc
;
if
(
!
cache_manager
->
getSwiGLUDescriptor
(
key
,
desc
))
{
RUN_INFINI
(
infiniopCreateSwiGLUDescriptor
(
rsrc
->
handle
,
&
desc
,
out
->
desc
(),
up
->
desc
(),
gate
->
desc
()));
cache_manager
->
putSwiGLUDescriptor
(
key
,
desc
);
}
size_t
workspace_size
=
0
;
RUN_INFINI
(
infiniopGetSwiGLUWorkspaceSize
(
desc
,
&
workspace_size
));
ensure_workspace
(
workspace_size
);
void
*
workspace
=
workspace_storage
->
memory
();
RUN_INFINI
(
infiniopSwiGLU
(
desc
,
workspace
,
workspace_size
,
out
->
data
(),
up
->
data
(),
gate
->
data
(),
stream
));
}
void
InferenceContext
::
randomSample
(
std
::
shared_ptr
<
Tensor
>
out
,
std
::
shared_ptr
<
TensorDesc
>
out_desc_overwrite
,
std
::
shared_ptr
<
Tensor
>
prob
,
std
::
shared_ptr
<
TensorDesc
>
prob_desc_overwrite
,
float
random_val
,
float
top_p
,
uint32_t
top_k
,
float
temperature
)
{
size_t
key
=
CacheManager
::
createDescriptorKey
(
out_desc_overwrite
?
out_desc_overwrite
:
out
->
tdesc
(),
prob_desc_overwrite
?
prob_desc_overwrite
:
prob
->
tdesc
(),
nullptr
,
nullptr
,
nullptr
);
infiniopRandomSampleDescriptor_t
desc
;
if
(
!
cache_manager
->
getRandomSampleDescriptor
(
key
,
desc
))
{
RUN_INFINI
(
infiniopCreateRandomSampleDescriptor
(
rsrc
->
handle
,
&
desc
,
out_desc_overwrite
?
out_desc_overwrite
->
desc
()
:
out
->
desc
(),
prob_desc_overwrite
?
prob_desc_overwrite
->
desc
()
:
prob
->
desc
()));
cache_manager
->
putRandomSampleDescriptor
(
key
,
desc
);
}
size_t
workspace_size
=
0
;
RUN_INFINI
(
infiniopGetRandomSampleWorkspaceSize
(
desc
,
&
workspace_size
));
ensure_workspace
(
workspace_size
);
void
*
workspace
=
workspace_storage
->
memory
();
RUN_INFINI
(
infiniopRandomSample
(
desc
,
workspace
,
workspace_size
,
out
->
data
(),
prob
->
data
(),
random_val
,
top_p
,
top_k
,
temperature
,
stream
));
}
src/models/inference_context.hpp
0 → 100644
View file @
d7965f91
// inference_context.hpp
#pragma once
#include "cache_manager.hpp"
#include "jiuge/jiuge_impl.hpp"
#include "jiuge/jiuge_weight.hpp"
struct
InferenceContext
{
DeviceResource
*
rsrc
;
CacheManager
*
cache_manager
;
infinirtStream_t
stream
;
std
::
shared_ptr
<
Storage
>
workspace_storage
;
size_t
current_workspace_size
=
0
;
InferenceContext
(
DeviceResource
*
rsrc
,
CacheManager
*
cache_manager
,
infinirtStream_t
stream
);
void
ensure_workspace
(
size_t
required_size
);
void
rmsnorm
(
std
::
shared_ptr
<
Tensor
>
y
,
std
::
shared_ptr
<
Tensor
>
x
,
std
::
shared_ptr
<
Tensor
>
w
,
float
epsilon
);
void
gemm
(
std
::
shared_ptr
<
Tensor
>
c
,
std
::
shared_ptr
<
TensorDesc
>
c_desc_overwrite
,
std
::
shared_ptr
<
Tensor
>
a
,
std
::
shared_ptr
<
TensorDesc
>
a_desc_overwrite
,
std
::
shared_ptr
<
Tensor
>
b
,
std
::
shared_ptr
<
TensorDesc
>
b_desc_overwrite
,
float
alpha
,
float
beta
);
void
rearrange
(
std
::
shared_ptr
<
Tensor
>
dst
,
std
::
shared_ptr
<
TensorDesc
>
dst_desc_overwrite
,
std
::
shared_ptr
<
Tensor
>
src
,
std
::
shared_ptr
<
TensorDesc
>
src_desc_overwrite
);
void
rope
(
std
::
shared_ptr
<
Tensor
>
q
,
std
::
shared_ptr
<
Tensor
>
k
,
std
::
shared_ptr
<
Tensor
>
pos
,
std
::
shared_ptr
<
Tensor
>
sin
,
std
::
shared_ptr
<
Tensor
>
cos
);
void
causalSoftmax
(
std
::
shared_ptr
<
Tensor
>
y
,
std
::
shared_ptr
<
TensorDesc
>
y_desc_overwrite
,
std
::
shared_ptr
<
Tensor
>
x
,
std
::
shared_ptr
<
TensorDesc
>
x_desc_overwrite
);
void
swiglu
(
std
::
shared_ptr
<
Tensor
>
out
,
std
::
shared_ptr
<
Tensor
>
up
,
std
::
shared_ptr
<
Tensor
>
gate
);
void
randomSample
(
std
::
shared_ptr
<
Tensor
>
out
,
std
::
shared_ptr
<
TensorDesc
>
out_desc_overwrite
,
std
::
shared_ptr
<
Tensor
>
prob
,
std
::
shared_ptr
<
TensorDesc
>
prob_desc_overwrite
,
float
random_val
,
float
top_p
,
uint32_t
top_k
,
float
temperature
);
};
src/models/jiuge/jiuge.cpp
View file @
d7965f91
This diff is collapsed.
Click to expand it.
src/tensor.hpp
View file @
d7965f91
...
...
@@ -120,6 +120,7 @@ public:
infiniDtype_t
dtype
()
const
;
bool
isContigous
()
const
;
infiniopTensorDescriptor_t
desc
()
const
;
std
::
shared_ptr
<
TensorDesc
>
tdesc
()
const
;
ptrdiff_t
dataOffset
()
const
;
infiniDevice_t
deviceType
()
const
;
int
deviceId
()
const
;
...
...
src/tensor/strorage.cpp
View file @
d7965f91
src/tensor/tensor.cpp
View file @
d7965f91
...
...
@@ -108,6 +108,7 @@ ptrdiff_t Tensor::dataOffset() const {
}
infiniopTensorDescriptor_t
Tensor
::
desc
()
const
{
return
_desc
->
desc
();
}
std
::
shared_ptr
<
TensorDesc
>
Tensor
::
tdesc
()
const
{
return
_desc
;
}
std
::
shared_ptr
<
Tensor
>
Tensor
::
buffer
(
infiniDtype_t
dtype
,
const
std
::
vector
<
size_t
>
&
shape
,
...
...
xmake.lua
View file @
d7965f91
...
...
@@ -12,6 +12,7 @@ target("infinicore_infer")
set_languages
(
"cxx17"
)
set_warnings
(
"all"
,
"error"
)
add_files
(
"src/models/*.cpp"
)
add_files
(
"src/models/*/*.cpp"
)
add_files
(
"src/tensor/*.cpp"
)
add_files
(
"src/allocator/*.cpp"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment