Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinicore
Commits
8d09630a
Unverified
Commit
8d09630a
authored
Feb 11, 2026
by
gongchensu
Committed by
GitHub
Feb 11, 2026
Browse files
Merge branch 'demo131' into Issue/862
parents
ab52dead
012df56c
Changes
387
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
508 additions
and
15 deletions
+508
-15
python/infinicore/nn/functional/linear_w8a8i8.py
python/infinicore/nn/functional/linear_w8a8i8.py
+31
-0
python/infinicore/nn/functional/silu_and_mul.py
python/infinicore/nn/functional/silu_and_mul.py
+17
-0
python/infinicore/ops/add_rms_norm.py
python/infinicore/ops/add_rms_norm.py
+34
-0
python/infinicore/ops/kv_caching.py
python/infinicore/ops/kv_caching.py
+13
-0
python/infinicore/ops/paged_attention.py
python/infinicore/ops/paged_attention.py
+40
-0
python/infinicore/ops/paged_attention_prefill.py
python/infinicore/ops/paged_attention_prefill.py
+45
-0
python/infinicore/ops/paged_caching.py
python/infinicore/ops/paged_caching.py
+21
-0
python/infinicore/tensor.py
python/infinicore/tensor.py
+5
-0
scripts/build_ntops.py
scripts/build_ntops.py
+25
-8
src/infiniccl-test/main.cpp
src/infiniccl-test/main.cpp
+2
-1
src/infiniccl/cambricon/infiniccl_cambricon.cc
src/infiniccl/cambricon/infiniccl_cambricon.cc
+1
-1
src/infiniccl/cuda/infiniccl_cuda.h
src/infiniccl/cuda/infiniccl_cuda.h
+1
-1
src/infiniccl/infiniccl.cc
src/infiniccl/infiniccl.cc
+3
-0
src/infinicore-test/README.md
src/infinicore-test/README.md
+1
-0
src/infinicore-test/main.cc
src/infinicore-test/main.cc
+3
-0
src/infinicore/context/allocators/device_pinned_allocator.cc
src/infinicore/context/allocators/device_pinned_allocator.cc
+6
-0
src/infinicore/context/allocators/host_allocator.cc
src/infinicore/context/allocators/host_allocator.cc
+6
-0
src/infinicore/context/allocators/pinnable_block_allocator.cc
...infinicore/context/allocators/pinnable_block_allocator.cc
+190
-0
src/infinicore/context/allocators/pinnable_block_allocator.hpp
...nfinicore/context/allocators/pinnable_block_allocator.hpp
+54
-0
src/infinicore/context/allocators/stream_ordered_allocator.cc
...infinicore/context/allocators/stream_ordered_allocator.cc
+10
-4
No files found.
python/infinicore/nn/functional/linear_w8a8i8.py
0 → 100644
View file @
8d09630a
from
infinicore.lib
import
_infinicore
from
infinicore.tensor
import
Tensor
def
linear_w8a8i8
(
input
:
Tensor
,
weight_packed
:
Tensor
,
weight_scale
:
Tensor
,
bias
=
None
,
out
=
None
,
)
->
Tensor
:
r
"""Linear layer with weight quantized to int8 and input quantized to int8 with per-tensor scale."""
if
out
is
None
:
return
Tensor
(
_infinicore
.
linear_w8a8i8
(
input
.
_underlying
,
weight_packed
.
_underlying
,
weight_scale
.
_underlying
,
None
if
bias
is
None
else
bias
.
_underlying
,
)
)
_infinicore
.
linear_w8a8i8_
(
out
.
_underlying
,
input
.
_underlying
,
weight_packed
.
_underlying
,
weight_scale
.
_underlying
,
None
if
bias
is
None
else
bias
.
_underlying
,
)
return
out
python/infinicore/nn/functional/silu_and_mul.py
0 → 100644
View file @
8d09630a
from
infinicore.lib
import
_infinicore
from
infinicore.tensor
import
Tensor
def
silu_and_mul
(
input
:
Tensor
,
out
=
None
)
->
Tensor
:
r
"""Apply the SiLU and Mul (SwiGLU) function.
Formula: output = SiLU(input_gate) * input_up
Input shape: [..., 2*d], Output shape: [..., d]
"""
if
out
is
None
:
return
Tensor
(
_infinicore
.
silu_and_mul
(
input
.
_underlying
))
_infinicore
.
silu_and_mul_
(
out
.
_underlying
,
input
.
_underlying
)
return
out
python/infinicore/ops/add_rms_norm.py
0 → 100644
View file @
8d09630a
import
infinicore.tensor
as
tensor
from
infinicore.lib
import
_infinicore
def
add_rms_norm
(
a
,
b
,
weight
,
epsilon
=
1e-5
,
*
,
out
=
None
,
residual
=
None
):
"""
Fused Add and RMS Normalization.
Args:
a: First input tensor
b: Second input tensor
weight: Scale weights
epsilon: Small constant for numerical stability, default is 1e-5
out: Optional output tuple (y, residual_out) for in-place operation
Returns:
Tuple of (normalized_result, add_result): (RMSNorm(a + b) * weight, a + b)
The add_result can be used as residual for subsequent layers.
"""
if
out
is
None
:
out
=
tensor
.
empty
(
a
.
shape
,
dtype
=
a
.
dtype
,
device
=
a
.
device
)
if
residual
is
None
:
residual
=
tensor
.
empty
(
b
.
shape
,
dtype
=
b
.
dtype
,
device
=
b
.
device
)
_infinicore
.
add_rms_norm_
(
out
.
_underlying
,
residual
.
_underlying
,
a
.
_underlying
,
b
.
_underlying
,
weight
.
_underlying
,
epsilon
,
)
return
out
,
residual
python/infinicore/ops/kv_caching.py
0 → 100644
View file @
8d09630a
from
infinicore.lib
import
_infinicore
def
kv_caching
(
k_cache
,
v_cache
,
k
,
v
,
past_kv_lengths
):
_infinicore
.
kv_caching_
(
k_cache
.
_underlying
,
v_cache
.
_underlying
,
k
.
_underlying
,
v
.
_underlying
,
past_kv_lengths
.
_underlying
,
)
return
k_cache
,
v_cache
python/infinicore/ops/paged_attention.py
0 → 100644
View file @
8d09630a
from
infinicore.lib
import
_infinicore
from
infinicore.tensor
import
Tensor
def
paged_attention
(
q
:
Tensor
,
k_cache
:
Tensor
,
v_cache
:
Tensor
,
block_tables
:
Tensor
,
cache_lens
:
Tensor
,
alibi_slopes
:
Tensor
|
None
=
None
,
scale
:
float
=
1.0
,
*
,
out
:
Tensor
|
None
=
None
,
):
if
out
is
None
:
return
Tensor
(
_infinicore
.
paged_attention
(
q
.
_underlying
,
k_cache
.
_underlying
,
v_cache
.
_underlying
,
block_tables
.
_underlying
,
cache_lens
.
_underlying
,
alibi_slopes
.
_underlying
if
alibi_slopes
is
not
None
else
None
,
scale
,
)
)
_infinicore
.
paged_attention_
(
out
.
_underlying
,
q
.
_underlying
,
k_cache
.
_underlying
,
v_cache
.
_underlying
,
block_tables
.
_underlying
,
cache_lens
.
_underlying
,
alibi_slopes
.
_underlying
if
alibi_slopes
is
not
None
else
None
,
scale
,
)
return
out
python/infinicore/ops/paged_attention_prefill.py
0 → 100644
View file @
8d09630a
from
infinicore.lib
import
_infinicore
from
infinicore.tensor
import
Tensor
def
paged_attention_prefill
(
q
:
Tensor
,
k_cache
:
Tensor
,
v_cache
:
Tensor
,
block_tables
:
Tensor
,
history_lens
:
Tensor
,
cu_seqlens_q
:
Tensor
,
alibi_slopes
:
Tensor
|
None
=
None
,
scale
:
float
=
1.0
,
*
,
out
:
Tensor
|
None
=
None
,
):
alibi_ptr
=
alibi_slopes
.
_underlying
if
alibi_slopes
is
not
None
else
None
if
out
is
None
:
return
Tensor
(
_infinicore
.
paged_attention_prefill
(
q
.
_underlying
,
k_cache
.
_underlying
,
v_cache
.
_underlying
,
block_tables
.
_underlying
,
history_lens
.
_underlying
,
cu_seqlens_q
.
_underlying
,
alibi_ptr
,
scale
,
)
)
_infinicore
.
paged_attention_prefill_
(
out
.
_underlying
,
q
.
_underlying
,
k_cache
.
_underlying
,
v_cache
.
_underlying
,
block_tables
.
_underlying
,
history_lens
.
_underlying
,
cu_seqlens_q
.
_underlying
,
alibi_ptr
,
scale
,
)
return
out
python/infinicore/ops/paged_caching.py
0 → 100644
View file @
8d09630a
from
infinicore.lib
import
_infinicore
from
infinicore.tensor
import
Tensor
def
paged_caching
(
k_cache
:
Tensor
,
v_cache
:
Tensor
,
k
:
Tensor
,
v
:
Tensor
,
slot_mapping
:
Tensor
,
):
Tensor
(
_infinicore
.
paged_caching_
(
k_cache
.
_underlying
,
v_cache
.
_underlying
,
k
.
_underlying
,
v
.
_underlying
,
slot_mapping
.
_underlying
,
)
)
return
(
k_cache
,
v_cache
)
python/infinicore/tensor.py
View file @
8d09630a
...
...
@@ -42,6 +42,11 @@ class Tensor:
getattr
(
self
.
_underlying
,
name
)
),
)
else
:
raise
AttributeError
(
"{!r} object has no attribute {!r}"
.
format
(
__name__
,
name
)
)
return
getattr
(
self
,
name
)
@
property
...
...
scripts/build_ntops.py
View file @
8d09630a
import
concurrent.futures
import
importlib
import
pathlib
...
...
@@ -11,16 +12,32 @@ SRC_DIR_PATH = CURRENT_FILE_PATH.parent.parent / "src"
def
_find_and_build_ops
():
ops_path
=
SRC_DIR_PATH
/
"infiniop"
/
"ops"
for
op_dir
in
ops_path
.
iterdir
()
:
ninetoothed_path
=
op_dir
/
"ninetoothed"
with
concurrent
.
futures
.
ProcessPoolExecutor
()
as
executor
:
futures
=
[]
if
ninetoothed_path
.
is_dir
():
module_path
=
ninetoothed_path
/
"build"
relative_path
=
module_path
.
relative_to
(
SRC_DIR_PATH
)
import_name
=
"."
.
join
(
relative_path
.
parts
)
module
=
importlib
.
import_module
(
import_name
)
for
op_dir
in
ops_path
.
iterdir
():
ninetoothed_path
=
op_dir
/
"ninetoothed"
module
.
build
()
if
not
ninetoothed_path
.
is_dir
():
continue
build_file
=
ninetoothed_path
/
"build.py"
if
not
build_file
.
exists
():
continue
futures
.
append
(
executor
.
submit
(
_build
,
ninetoothed_path
))
for
future
in
concurrent
.
futures
.
as_completed
(
futures
):
future
.
result
()
def
_build
(
ninetoothed_path
):
module_path
=
ninetoothed_path
/
"build"
relative_path
=
module_path
.
relative_to
(
SRC_DIR_PATH
)
import_name
=
"."
.
join
(
relative_path
.
parts
)
module
=
importlib
.
import_module
(
import_name
)
module
.
build
()
if
__name__
==
"__main__"
:
...
...
src/infiniccl-test/main.cpp
View file @
8d09630a
...
...
@@ -12,7 +12,7 @@ void printUsage() {
std
::
cout
<<
"infiniccl-test --<device>"
<<
std
::
endl
<<
std
::
endl
;
std
::
cout
<<
" --<device>"
<<
std
::
endl
;
std
::
cout
<<
" Specify the device type --(nvidia|cambricon|ascend|metax|moore|iluvatar|qy|kunlun|hygon)."
<<
std
::
endl
std
::
cout
<<
" Specify the device type --(nvidia|cambricon|ascend|metax|moore|iluvatar|qy|kunlun|hygon
|ali
)."
<<
std
::
endl
<<
std
::
endl
;
std
::
cout
<<
"The program will run tests on all visible devices of the specified device type."
<<
" Use Environmental Variables such as CUDA_VSIBLE_DEVICES to limit visible device IDs."
;
...
...
@@ -46,6 +46,7 @@ ParsedArgs parseArgs(int argc, char *argv[]) {
else
PARSE_DEVICE
(
"--qy"
,
INFINI_DEVICE_QY
)
else
PARSE_DEVICE
(
"--kunlun"
,
INFINI_DEVICE_KUNLUN
)
else
PARSE_DEVICE
(
"--hygon"
,
INFINI_DEVICE_HYGON
)
else
PARSE_DEVICE
(
"--ali"
,
INFINI_DEVICE_ALI
)
else
{
printUsage
();
}
...
...
src/infiniccl/cambricon/infiniccl_cambricon.cc
View file @
8d09630a
...
...
@@ -62,7 +62,7 @@ infiniStatus_t commInitAll(
for
(
int
i
=
0
;
i
<
ndevice
;
i
++
)
{
rank_list
[
i
]
=
i
;
CHECK_INTERNAL
(
cnrtSetDevice
(
device_ids
[
i
]),
CNRT_RET_SUCCESS
);
CHECK_INTERNAL
(
cnrtSetDevice
(
device_ids
[
i
]),
cnrtSuccess
);
}
CHECK_CNCL
(
cnclInitComms
(
cncl_comms
.
data
(),
ndevice
,
...
...
src/infiniccl/cuda/infiniccl_cuda.h
View file @
8d09630a
...
...
@@ -4,7 +4,7 @@
#include "../infiniccl_impl.h"
// Windows does not support CUDA
#if (defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) || defined(ENABLE_QY_API) || defined(ENABLE_HYGON_API)) && defined(ENABLE_CCL) && !defined(_WIN32)
#if (defined(ENABLE_NVIDIA_API) || defined(ENABLE_ILUVATAR_API) || defined(ENABLE_QY_API) || defined(ENABLE_HYGON_API)
|| defined(ENABLE_ALI_API)
) && defined(ENABLE_CCL) && !defined(_WIN32)
INFINICCL_DEVICE_API_IMPL
(
cuda
)
#else
INFINICCL_DEVICE_API_NOOP
(
cuda
)
...
...
src/infiniccl/infiniccl.cc
View file @
8d09630a
...
...
@@ -27,6 +27,7 @@ __C infiniStatus_t infinicclCommInitAll(
COMM_INIT_ALL
(
INFINI_DEVICE_METAX
,
metax
);
COMM_INIT_ALL
(
INFINI_DEVICE_MOORE
,
moore
);
COMM_INIT_ALL
(
INFINI_DEVICE_KUNLUN
,
kunlun
);
COMM_INIT_ALL
(
INFINI_DEVICE_ALI
,
cuda
);
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
...
...
@@ -53,6 +54,7 @@ __C infiniStatus_t infinicclCommDestroy(infinicclComm_t comm) {
COMM_DESTROY
(
INFINI_DEVICE_METAX
,
metax
);
COMM_DESTROY
(
INFINI_DEVICE_MOORE
,
moore
);
COMM_DESTROY
(
INFINI_DEVICE_KUNLUN
,
kunlun
);
COMM_DESTROY
(
INFINI_DEVICE_ALI
,
cuda
);
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
}
...
...
@@ -86,6 +88,7 @@ __C infiniStatus_t infinicclAllReduce(
ALL_REDUCE
(
INFINI_DEVICE_METAX
,
metax
);
ALL_REDUCE
(
INFINI_DEVICE_MOORE
,
moore
);
ALL_REDUCE
(
INFINI_DEVICE_KUNLUN
,
kunlun
);
ALL_REDUCE
(
INFINI_DEVICE_ALI
,
cuda
);
default:
return
INFINI_STATUS_DEVICE_TYPE_NOT_SUPPORTED
;
...
...
src/infinicore-test/README.md
View file @
8d09630a
...
...
@@ -66,6 +66,7 @@ xmake build infinicore-test
./infinicore-test
--qy
./infinicore-test
--kunlun
./infinicore-test
--hygon
./infinicore-test
--ali
```
### Customize Test Parameters
...
...
src/infinicore-test/main.cc
View file @
8d09630a
...
...
@@ -42,6 +42,7 @@ void printUsage() {
<<
" qy"
<<
std
::
endl
<<
" kunlun"
<<
std
::
endl
<<
" hygon"
<<
std
::
endl
<<
" ali"
<<
std
::
endl
<<
std
::
endl
<<
"Available tests:"
<<
std
::
endl
<<
" basic - Basic memory allocation and deallocation tests"
<<
std
::
endl
...
...
@@ -84,6 +85,8 @@ ParsedArgs parseArgs(int argc, char *argv[]) {
args
.
device_type
=
INFINI_DEVICE_KUNLUN
;
}
else
if
(
arg
==
"--hygon"
)
{
args
.
device_type
=
INFINI_DEVICE_HYGON
;
}
else
if
(
arg
==
"--ali"
)
{
args
.
device_type
=
INFINI_DEVICE_ALI
;
}
else
if
(
arg
==
"--test"
)
{
if
(
i
+
1
>=
argc
)
{
std
::
cerr
<<
"Error: --test requires a test name"
<<
std
::
endl
;
...
...
src/infinicore/context/allocators/device_pinned_allocator.cc
View file @
8d09630a
...
...
@@ -12,12 +12,18 @@ DevicePinnedHostAllocator::~DevicePinnedHostAllocator() {
}
std
::
byte
*
DevicePinnedHostAllocator
::
allocate
(
size_t
size
)
{
if
(
size
==
0
)
{
return
nullptr
;
}
void
*
ptr
;
INFINICORE_CHECK_ERROR
(
infinirtMallocHost
(
&
ptr
,
size
));
return
(
std
::
byte
*
)
ptr
;
}
void
DevicePinnedHostAllocator
::
deallocate
(
std
::
byte
*
ptr
)
{
if
(
ptr
==
nullptr
)
{
return
;
}
if
(
owner_
==
context
::
getDevice
())
{
INFINICORE_CHECK_ERROR
(
infinirtFreeHost
(
ptr
));
gc
();
...
...
src/infinicore/context/allocators/host_allocator.cc
View file @
8d09630a
...
...
@@ -4,10 +4,16 @@
namespace
infinicore
{
std
::
byte
*
HostAllocator
::
allocate
(
size_t
size
)
{
if
(
size
==
0
)
{
return
nullptr
;
}
return
(
std
::
byte
*
)
std
::
malloc
(
size
);
}
void
HostAllocator
::
deallocate
(
std
::
byte
*
ptr
)
{
if
(
ptr
==
nullptr
)
{
return
;
}
std
::
free
(
ptr
);
}
...
...
src/infinicore/context/allocators/pinnable_block_allocator.cc
0 → 100644
View file @
8d09630a
#include "pinnable_block_allocator.hpp"
#include "../context_impl.hpp"
#include "../../utils.hpp"
#include <algorithm>
#include <infinirt.h>
#include <stdexcept>
namespace
infinicore
{
// ------------------- Helper functions -------------------
// Round up size to nearest multiple of alignment
inline
size_t
align_up
(
size_t
size
,
size_t
alignment
)
{
return
(
size
+
alignment
-
1
)
/
alignment
*
alignment
;
}
// ------------------- Constructor -------------------
PinnableBlockAllocator
::
PinnableBlockAllocator
(
Device
device
)
:
device_
(
device
)
{
size_classes_
=
{
{
32
*
1024
,
{}},
// 32 KB
{
256
*
1024
,
{}},
// 256 KB
{
1
*
1024
*
1024
,
{}},
// 1 MB
{
2
*
1024
*
1024
,
{}},
// 2 MB
{
4
*
1024
*
1024
,
{}},
// 4 MB
{
8
*
1024
*
1024
,
{}},
// 8 MB
{
16
*
1024
*
1024
,
{}},
// 16 MB
{
32
*
1024
*
1024
,
{}},
// 32 MB
{
64
*
1024
*
1024
,
{}},
// 64 MB
{
128
*
1024
*
1024
,
{}},
// 128 MB
{
256
*
1024
*
1024
,
{}},
// 256 MB
};
}
// ------------------- allocate -------------------
std
::
byte
*
PinnableBlockAllocator
::
allocate
(
size_t
size
)
{
if
(
size
==
0
)
{
return
nullptr
;
}
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
// Align size to 256 bytes for GPU
size
=
align_up
(
size
,
256
);
std
::
shared_ptr
<
Block
>
block
;
// 1. Try size-class allocation for small/medium
for
(
auto
&
cls
:
size_classes_
)
{
if
(
size
<=
cls
.
block_size
)
{
if
(
!
cls
.
free_blocks
.
empty
())
{
block
=
cls
.
free_blocks
.
back
();
while
(
block
!=
nullptr
&&
block
->
in_use
)
{
cls
.
free_blocks
.
pop_back
();
if
(
cls
.
free_blocks
.
empty
())
{
block
=
nullptr
;
break
;
}
block
=
cls
.
free_blocks
.
back
();
}
if
(
block
!=
nullptr
)
{
cls
.
free_blocks
.
pop_back
();
block
->
in_use
=
true
;
return
reinterpret_cast
<
std
::
byte
*>
(
block
->
ptr
);
}
}
// Allocate a new block for this class
block
=
std
::
make_shared
<
Block
>
();
block
->
size
=
cls
.
block_size
;
block
->
frozen
=
pinned_mode_
;
block
->
in_use
=
true
;
INFINICORE_CHECK_ERROR
(
infinirtMalloc
(
&
block
->
ptr
,
block
->
size
));
all_blocks_
[
block
->
ptr
]
=
block
;
return
reinterpret_cast
<
std
::
byte
*>
(
block
->
ptr
);
}
}
// 2. Large block allocation
// Try to reuse a frozen or free large block
auto
it
=
std
::
find_if
(
large_blocks_
.
begin
(),
large_blocks_
.
end
(),
[
size
](
const
std
::
shared_ptr
<
Block
>
&
b
)
{
return
b
->
size
>=
size
&&
!
b
->
in_use
;
});
if
(
it
!=
large_blocks_
.
end
())
{
block
=
*
it
;
block
->
in_use
=
true
;
block
->
frozen
=
block
->
frozen
||
pinned_mode_
;
return
reinterpret_cast
<
std
::
byte
*>
(
block
->
ptr
);
}
// Allocate new large block
block
=
std
::
make_shared
<
Block
>
();
block
->
size
=
size
;
block
->
frozen
=
pinned_mode_
;
block
->
in_use
=
true
;
INFINICORE_CHECK_ERROR
(
infinirtMalloc
(
&
block
->
ptr
,
block
->
size
));
large_blocks_
.
push_back
(
block
);
all_blocks_
[
block
->
ptr
]
=
block
;
return
reinterpret_cast
<
std
::
byte
*>
(
block
->
ptr
);
}
// ------------------- deallocate -------------------
void
PinnableBlockAllocator
::
deallocate
(
std
::
byte
*
ptr
)
{
if
(
ptr
==
nullptr
)
{
return
;
}
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
auto
it
=
all_blocks_
.
find
(
reinterpret_cast
<
void
*>
(
ptr
));
if
(
it
==
all_blocks_
.
end
())
{
throw
std
::
runtime_error
(
"Pointer not allocated by this allocator"
);
}
auto
block
=
it
->
second
;
if
(
!
block
->
in_use
)
{
throw
std
::
runtime_error
(
"Double free detected in PinnableBlockAllocator"
);
}
block
->
in_use
=
false
;
if
(
!
block
->
in_use
)
{
for
(
auto
&
cls
:
size_classes_
)
{
if
(
block
->
size
==
cls
.
block_size
)
{
cls
.
free_blocks
.
push_back
(
block
);
break
;
}
}
}
}
size_t
PinnableBlockAllocator
::
mark_in_use_
(
void
*
ptr
,
bool
in_use
)
{
auto
it
=
all_blocks_
.
find
(
reinterpret_cast
<
void
*>
(
ptr
));
if
(
it
==
all_blocks_
.
end
())
{
throw
std
::
runtime_error
(
"Pointer not allocated by this allocator"
);
}
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
it
->
second
->
in_use
=
in_use
;
return
it
->
second
->
size
;
}
// ------------------- trim -------------------
void
PinnableBlockAllocator
::
trim
()
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
// Free non-frozen size-class blocks
for
(
auto
&
cls
:
size_classes_
)
{
for
(
auto
it
=
cls
.
free_blocks
.
begin
();
it
!=
cls
.
free_blocks
.
end
();)
{
if
(
!
(
*
it
)
->
frozen
)
{
INFINICORE_CHECK_ERROR
(
infinirtFree
((
*
it
)
->
ptr
));
all_blocks_
.
erase
((
*
it
)
->
ptr
);
it
=
cls
.
free_blocks
.
erase
(
it
);
}
else
{
++
it
;
}
}
}
// Free non-frozen large blocks
for
(
auto
it
=
large_blocks_
.
begin
();
it
!=
large_blocks_
.
end
();)
{
if
(
!
(
*
it
)
->
frozen
&&
!
(
*
it
)
->
in_use
)
{
INFINICORE_CHECK_ERROR
(
infinirtFree
((
*
it
)
->
ptr
));
all_blocks_
.
erase
((
*
it
)
->
ptr
);
it
=
large_blocks_
.
erase
(
it
);
}
else
{
++
it
;
}
}
}
// ------------------- Destructor -------------------
PinnableBlockAllocator
::~
PinnableBlockAllocator
()
{
std
::
lock_guard
<
std
::
mutex
>
lock
(
mutex_
);
for
(
auto
&
p
:
all_blocks_
)
{
if
(
p
.
second
->
ptr
)
{
infinirtFree
(
p
.
second
->
ptr
);
}
}
all_blocks_
.
clear
();
large_blocks_
.
clear
();
for
(
auto
&
cls
:
size_classes_
)
{
cls
.
free_blocks
.
clear
();
}
}
}
// namespace infinicore
src/infinicore/context/allocators/pinnable_block_allocator.hpp
0 → 100644
View file @
8d09630a
#pragma once
#include "memory_allocator.hpp"
#include <mutex>
#include <unordered_map>
#include <vector>
namespace
infinicore
{
class
PinnableBlockAllocator
:
public
MemoryAllocator
{
// Represents a single memory block
struct
Block
{
void
*
ptr
=
nullptr
;
// Device pointer
size_t
size
=
0
;
// Block size in bytes
bool
frozen
=
false
;
// True if used in pinned/graph mode
bool
in_use
=
false
;
// Wether the block is currently in use
};
// A simple size-class allocator for small/medium blocks
struct
SizeClass
{
size_t
block_size
;
// Fixed size for this class
std
::
vector
<
std
::
shared_ptr
<
Block
>>
free_blocks
;
};
public:
PinnableBlockAllocator
(
Device
device
);
~
PinnableBlockAllocator
();
std
::
byte
*
allocate
(
size_t
size
)
override
;
void
deallocate
(
std
::
byte
*
ptr
)
override
;
// Switch pinned/graph mode
void
set_pin_mode
(
bool
pinned
)
{
pinned_mode_
=
pinned
;
}
// internal use only, force set in_use flag for a mem block
// return the size of the block
size_t
mark_in_use_
(
void
*
ptr
,
bool
in_use
);
// trim cached blocks back to GPU (not pinned)
void
trim
();
private:
Device
device_
;
bool
pinned_mode_
=
false
;
std
::
vector
<
SizeClass
>
size_classes_
;
std
::
vector
<
std
::
shared_ptr
<
Block
>>
large_blocks_
;
std
::
unordered_map
<
void
*
,
std
::
shared_ptr
<
Block
>>
all_blocks_
;
std
::
mutex
mutex_
;
// Thread safety
};
}
// namespace infinicore
src/infinicore/context/allocators/
device_caching
_allocator.cc
→
src/infinicore/context/allocators/
stream_ordered
_allocator.cc
View file @
8d09630a
#include "
device_caching
_allocator.hpp"
#include "
stream_ordered
_allocator.hpp"
#include <infinirt.h>
#include "../../utils.hpp"
namespace
infinicore
{
DeviceCachingAllocator
::
DeviceCaching
Allocator
(
Device
device
)
:
MemoryAllocator
(),
device_
(
device
)
{}
StreamOrderedAllocator
::
StreamOrdered
Allocator
(
Device
device
)
:
MemoryAllocator
(),
device_
(
device
)
{}
std
::
byte
*
DeviceCachingAllocator
::
allocate
(
size_t
size
)
{
std
::
byte
*
StreamOrderedAllocator
::
allocate
(
size_t
size
)
{
if
(
size
==
0
)
{
return
nullptr
;
}
void
*
ptr
=
nullptr
;
INFINICORE_CHECK_ERROR
(
infinirtMallocAsync
(
&
ptr
,
size
,
context
::
getStream
()));
return
(
std
::
byte
*
)
ptr
;
}
void
DeviceCachingAllocator
::
deallocate
(
std
::
byte
*
ptr
)
{
void
StreamOrderedAllocator
::
deallocate
(
std
::
byte
*
ptr
)
{
if
(
ptr
==
nullptr
)
{
return
;
}
INFINICORE_CHECK_ERROR
(
infinirtFreeAsync
(
ptr
,
context
::
getStream
()));
}
}
// namespace infinicore
Prev
1
2
3
4
5
6
7
8
…
20
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment