Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
ox696c
ktransformers
Commits
877aec85
Unverified
Commit
877aec85
authored
Apr 09, 2025
by
Yuhao Tsui
Committed by
GitHub
Apr 09, 2025
Browse files
Merge branch 'kvcache-ai:main' into main
parents
84164f58
9037bf30
Changes
251
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
1581 additions
and
0 deletions
+1581
-0
csrc/balance_serve/kvc2/test/kvc2test/lookup-gpu-mt.cpp
csrc/balance_serve/kvc2/test/kvc2test/lookup-gpu-mt.cpp
+68
-0
csrc/balance_serve/kvc2/test/kvc2test/lookup-gpu.cpp
csrc/balance_serve/kvc2/test/kvc2test/lookup-gpu.cpp
+160
-0
csrc/balance_serve/kvc2/test/kvc2test/lookup-mt.cpp
csrc/balance_serve/kvc2/test/kvc2test/lookup-mt.cpp
+103
-0
csrc/balance_serve/kvc2/test/kvc2test/lookup-without-vcache.cpp
...alance_serve/kvc2/test/kvc2test/lookup-without-vcache.cpp
+84
-0
csrc/balance_serve/kvc2/test/kvc2test/lookup.cpp
csrc/balance_serve/kvc2/test/kvc2test/lookup.cpp
+90
-0
csrc/balance_serve/kvc2/test/kvc2test/raw_insert_read.cpp
csrc/balance_serve/kvc2/test/kvc2test/raw_insert_read.cpp
+99
-0
csrc/balance_serve/kvc2/test/kvcache_disk_insert_read_test.cpp
...balance_serve/kvc2/test/kvcache_disk_insert_read_test.cpp
+87
-0
csrc/balance_serve/kvc2/test/kvcache_mem_eviction_test.cpp
csrc/balance_serve/kvc2/test/kvcache_mem_eviction_test.cpp
+52
-0
csrc/balance_serve/kvc2/test/kvcache_mem_insert_read_test.cpp
.../balance_serve/kvc2/test/kvcache_mem_insert_read_test.cpp
+104
-0
csrc/balance_serve/kvc2/test/kvcache_save_load_test.cpp
csrc/balance_serve/kvc2/test/kvcache_save_load_test.cpp
+102
-0
csrc/balance_serve/kvc2/test/kvcache_test_utils.cpp
csrc/balance_serve/kvc2/test/kvcache_test_utils.cpp
+0
-0
csrc/balance_serve/kvc2/test/page_pool_test.cpp
csrc/balance_serve/kvc2/test/page_pool_test.cpp
+57
-0
csrc/balance_serve/kvc2/test/prefix_test.cpp
csrc/balance_serve/kvc2/test/prefix_test.cpp
+0
-0
csrc/balance_serve/kvc2/test/pytest_load.py
csrc/balance_serve/kvc2/test/pytest_load.py
+61
-0
csrc/balance_serve/kvc2/test/pytest_mem_prefix_test.py
csrc/balance_serve/kvc2/test/pytest_mem_prefix_test.py
+83
-0
csrc/balance_serve/kvc2/test/pytest_mem_read.py
csrc/balance_serve/kvc2/test/pytest_mem_read.py
+72
-0
csrc/balance_serve/kvc2/test/pytest_raw_insert_and_read.py
csrc/balance_serve/kvc2/test/pytest_raw_insert_and_read.py
+69
-0
csrc/balance_serve/kvc2/test/test_align.py
csrc/balance_serve/kvc2/test/test_align.py
+32
-0
csrc/balance_serve/kvc2/test/test_cuda_stream.cpp
csrc/balance_serve/kvc2/test/test_cuda_stream.cpp
+145
-0
csrc/balance_serve/kvc2/test/test_cuda_stream_manager.cpp
csrc/balance_serve/kvc2/test/test_cuda_stream_manager.cpp
+113
-0
No files found.
csrc/balance_serve/kvc2/test/kvc2test/lookup-gpu-mt.cpp
0 → 100644
View file @
877aec85
/**
* @Description :
* @Author : Xie Weiyu
* @Date : 2024-11-22 09:52:48
* @Version : 1.0.0
* @LastEditors : Xie Weiyu
* @LastEditTime : 2024-11-25 07:51:09
* @Copyright (c) 2024 by KVCache.AI, All Rights Reserved.
**/
#include "common.hpp"
int
main
(
int
argc
,
char
*
argv
[])
{
init
(
argc
,
argv
);
spdlog
::
set_level
(
spdlog
::
level
::
debug
);
auto
kvc2
=
kvc2
::
create_kvc2
(
config
);
std
::
mt19937
gen
(
123
);
auto
ids1
=
random_ids
(
10
*
config
.
num_token_per_page
,
gen
);
auto
k1
=
random_kvcache
(
10
,
gen
);
auto
v1
=
random_kvcache
(
10
,
gen
);
kvc2
->
raw_insert
(
test_model_name
,
test_quant_type
,
ids1
.
data
(),
ids1
.
size
(),
k1
,
v1
);
// complete same
#pragma omp parallel for
for
(
size_t
ti
=
0
;
ti
<
3
;
ti
++
)
{
auto
h
=
kvc2
->
lookup_to_gpu
(
test_model_name
,
test_quant_type
,
ids1
.
data
(),
ids1
.
size
(),
ids1
.
size
()
+
2
*
config
.
num_token_per_page
);
auto
k
=
h
->
handle_data
(
true
);
auto
v
=
h
->
handle_data
(
false
);
cmp_handle_data
(
k1
,
k
,
10
);
cmp_handle_data
(
v1
,
v
,
10
);
auto
block_idx
=
h
->
get_gpu_block_idx
();
auto
[
kcache
,
vcache
]
=
kvc2
->
get_kvcache
();
auto
k_from_gpu
=
empty_kvcache
(
15
);
auto
v_from_gpu
=
empty_kvcache
(
15
);
size_t
gpu_count
=
config
.
gpu_cache_config
->
gpu_devices_id
.
size
();
size_t
element_size_per_gpu
=
test_cache_info
.
element_size
(
config
.
num_token_per_page
)
/
gpu_count
;
for
(
size_t
i
=
0
;
i
<
k_from_gpu
.
size
();
i
++
)
{
for
(
size_t
j
=
0
;
j
<
block_idx
.
size
();
j
++
)
{
size_t
b_idx
=
block_idx
[
j
];
for
(
size_t
gpu_idx
=
0
;
gpu_idx
<
gpu_count
;
gpu_idx
++
)
{
{
auto
kt
=
kcache
[
gpu_idx
][
i
][
b_idx
].
to
(
torch
::
kCPU
);
void
*
src
=
kt
.
data_ptr
();
void
*
dst
=
offset_by_bytes
(
k_from_gpu
[
i
][
j
],
gpu_idx
*
element_size_per_gpu
);
memcpy
(
dst
,
src
,
element_size_per_gpu
);
}
{
auto
vt
=
vcache
[
gpu_idx
][
i
][
b_idx
].
to
(
torch
::
kCPU
);
void
*
src
=
vt
.
data_ptr
();
void
*
dst
=
offset_by_bytes
(
v_from_gpu
[
i
][
j
],
gpu_idx
*
element_size_per_gpu
);
memcpy
(
dst
,
src
,
element_size_per_gpu
);
}
}
}
}
cmp_handle_data
(
k1
,
k_from_gpu
,
10
);
cmp_handle_data
(
v1
,
v_from_gpu
,
10
);
}
SPDLOG_CRITICAL
(
"All Test Passed: {}"
,
argv
[
0
]);
return
0
;
}
csrc/balance_serve/kvc2/test/kvc2test/lookup-gpu.cpp
0 → 100644
View file @
877aec85
/**
* @Description :
* @Author : Xie Weiyu
* @Date : 2024-11-22 09:52:48
* @Version : 1.0.0
* @LastEditors : Xie Weiyu
* @LastEditTime : 2024-11-25 08:38:33
* @Copyright (c) 2024 by KVCache.AI, All Rights Reserved.
**/
#include "common.hpp"
int
main
(
int
argc
,
char
*
argv
[])
{
init
(
argc
,
argv
);
spdlog
::
set_level
(
spdlog
::
level
::
debug
);
auto
kvc2
=
kvc2
::
create_kvc2
(
config
);
std
::
mt19937
gen
(
123
);
auto
ids1
=
random_ids
(
10
*
config
.
num_token_per_page
,
gen
);
auto
k1
=
random_kvcache
(
10
,
gen
);
auto
v1
=
random_kvcache
(
10
,
gen
);
kvc2
->
raw_insert
(
test_model_name
,
test_quant_type
,
ids1
.
data
(),
ids1
.
size
(),
k1
,
v1
);
// complete same
{
auto
h
=
kvc2
->
lookup_to_gpu
(
test_model_name
,
test_quant_type
,
ids1
.
data
(),
ids1
.
size
(),
ids1
.
size
()
+
5
*
config
.
num_token_per_page
);
auto
k
=
h
->
handle_data
(
true
);
auto
v
=
h
->
handle_data
(
false
);
cmp_handle_data
(
k1
,
k
,
10
);
cmp_handle_data
(
v1
,
v
,
10
);
auto
block_idx
=
h
->
get_gpu_block_idx
();
auto
[
kcache
,
vcache
]
=
kvc2
->
get_kvcache
();
auto
k_from_gpu
=
empty_kvcache
(
15
);
auto
v_from_gpu
=
empty_kvcache
(
15
);
size_t
gpu_count
=
config
.
gpu_cache_config
->
gpu_devices_id
.
size
();
size_t
element_size_per_gpu
=
test_cache_info
.
element_size
(
config
.
num_token_per_page
)
/
gpu_count
;
for
(
size_t
i
=
0
;
i
<
k_from_gpu
.
size
();
i
++
)
{
for
(
size_t
j
=
0
;
j
<
block_idx
.
size
();
j
++
)
{
size_t
b_idx
=
block_idx
[
j
];
for
(
size_t
gpu_idx
=
0
;
gpu_idx
<
gpu_count
;
gpu_idx
++
)
{
{
auto
kt
=
kcache
[
gpu_idx
][
i
][
b_idx
].
to
(
torch
::
kCPU
);
void
*
src
=
kt
.
data_ptr
();
void
*
dst
=
offset_by_bytes
(
k_from_gpu
[
i
][
j
],
gpu_idx
*
element_size_per_gpu
);
memcpy
(
dst
,
src
,
element_size_per_gpu
);
}
{
auto
vt
=
vcache
[
gpu_idx
][
i
][
b_idx
].
to
(
torch
::
kCPU
);
void
*
src
=
vt
.
data_ptr
();
void
*
dst
=
offset_by_bytes
(
v_from_gpu
[
i
][
j
],
gpu_idx
*
element_size_per_gpu
);
memcpy
(
dst
,
src
,
element_size_per_gpu
);
}
}
}
}
cmp_handle_data
(
k1
,
k_from_gpu
,
10
);
cmp_handle_data
(
v1
,
v_from_gpu
,
10
);
}
// prefix and evict
{
auto
h
=
kvc2
->
lookup_to_gpu
(
test_model_name
,
test_quant_type
,
ids1
.
data
(),
config
.
num_token_per_page
*
3
,
config
.
gpu_cache_config
->
total_kvcache_pages
*
config
.
num_token_per_page
);
auto
k
=
h
->
handle_data
(
true
);
auto
v
=
h
->
handle_data
(
false
);
cmp_handle_data
(
k1
,
k
,
3
);
cmp_handle_data
(
v1
,
v
,
3
);
auto
block_idx
=
h
->
get_gpu_block_idx
();
auto
[
kcache
,
vcache
]
=
kvc2
->
get_kvcache
();
auto
k_from_gpu
=
empty_kvcache
(
3
);
auto
v_from_gpu
=
empty_kvcache
(
3
);
size_t
gpu_count
=
config
.
gpu_cache_config
->
gpu_devices_id
.
size
();
size_t
element_size_per_gpu
=
test_cache_info
.
element_size
(
config
.
num_token_per_page
)
/
gpu_count
;
for
(
size_t
i
=
0
;
i
<
k_from_gpu
.
size
();
i
++
)
{
for
(
size_t
j
=
0
;
j
<
3
;
j
++
)
{
size_t
b_idx
=
block_idx
[
j
];
for
(
size_t
gpu_idx
=
0
;
gpu_idx
<
gpu_count
;
gpu_idx
++
)
{
{
auto
kt
=
kcache
[
gpu_idx
][
i
][
b_idx
].
to
(
torch
::
kCPU
);
void
*
src
=
kt
.
data_ptr
();
void
*
dst
=
offset_by_bytes
(
k_from_gpu
[
i
][
j
],
gpu_idx
*
element_size_per_gpu
);
memcpy
(
dst
,
src
,
element_size_per_gpu
);
}
{
auto
vt
=
vcache
[
gpu_idx
][
i
][
b_idx
].
to
(
torch
::
kCPU
);
void
*
src
=
vt
.
data_ptr
();
void
*
dst
=
offset_by_bytes
(
v_from_gpu
[
i
][
j
],
gpu_idx
*
element_size_per_gpu
);
memcpy
(
dst
,
src
,
element_size_per_gpu
);
}
}
}
}
cmp_handle_data
(
k1
,
k_from_gpu
,
3
);
cmp_handle_data
(
v1
,
v_from_gpu
,
3
);
}
// // complete prefix
// {
// std::vector<Token> ids2(ids1.begin(), ids1.begin() + 3 * config.num_token_per_page);
// auto h = kvc2->lookup(test_model_name, test_quant_type, ids2.data(), ids2.size(),
// ids2.size() + 3 * config.num_token_per_page);
// auto k = h->handle_data(true);
// auto v = h->handle_data(false);
// cmp_handle_data(k1, k, 3);
// cmp_handle_data(v1, v, 3);
// }
// // common prefix
// {
// std::vector<Token> ids2(ids1.begin(), ids1.begin() + 3 * config.num_token_per_page);
// auto rids = random_ids(config.num_token_per_page * 2 + config.num_token_per_page / 2, gen);
// ids2.insert(ids2.end(), rids.begin(), rids.end());
// auto h = kvc2->lookup(test_model_name, test_quant_type, ids2.data(), ids2.size(), ids2.size());
// auto k = h->handle_data(true);
// auto v = h->handle_data(false);
// cmp_handle_data(k1, k, 3);
// cmp_handle_data(v1, v, 3);
// }
// // no prefix
// {
// std::vector<Token> ids2 = random_ids(config.num_token_per_page, gen);
// auto h = kvc2->lookup(test_model_name, test_quant_type, ids2.data(), ids2.size(), ids2.size());
// assert(h->matched_length() == 0);
// }
// // insert partly new
// auto k2 = random_kvcache(10, gen);
// auto v2 = random_kvcache(10, gen);
// copy_kvcache(k1, k2, 0, 5);
// copy_kvcache(v1, v2, 0, 5);
// auto ids2 = random_ids(10 * config.num_token_per_page, gen);
// for (size_t i = 0; i < 5 * config.num_token_per_page; i++) {
// ids2[i] = ids1[i];
// }
// kvc2->raw_insert(test_model_name, test_quant_type, ids2.data(), ids2.size(), k2, v2);
// // read new part
// {
// std::vector<Token> ids(ids2.begin(), ids2.begin() + 7 * config.num_token_per_page);
// auto h = kvc2->lookup(test_model_name, test_quant_type, ids.data(), ids.size(),
// ids.size() + 7 * config.num_token_per_page);
// auto k = h->handle_data(true);
// auto v = h->handle_data(false);
// cmp_handle_data(k, k2, 7);
// cmp_handle_data(v, v2, 7);
// }
SPDLOG_CRITICAL
(
"All Test Passed: {}"
,
argv
[
0
]);
return
0
;
}
csrc/balance_serve/kvc2/test/kvc2test/lookup-mt.cpp
0 → 100644
View file @
877aec85
/**
* @Description :
* @Author : Xie Weiyu
* @Date : 2024-11-22 08:48:40
* @Version : 1.0.0
* @LastEditors : Xie Weiyu
* @LastEditTime : 2024-11-22 09:53:06
* @Copyright (c) 2024 by KVCache.AI, All Rights Reserved.
**/
#include "common.hpp"
template
<
typename
F
>
void
test_multi
(
F
f
)
{
std
::
vector
<
std
::
thread
>
threads
;
for
(
size_t
i
=
0
;
i
<
10
;
i
++
)
{
threads
.
push_back
([
f
]()
{
f
();
});
}
for
(
auto
&
t
:
threads
)
{
t
.
join
();
}
}
int
main
(
int
argc
,
char
*
argv
[])
{
init
(
argc
,
argv
);
spdlog
::
set_level
(
spdlog
::
level
::
debug
);
auto
kvc2
=
kvc2
::
create_kvc2
(
config
);
std
::
mt19937
gen
(
123
);
auto
ids1
=
random_ids
(
3
*
config
.
num_token_per_page
,
gen
);
auto
k1
=
random_kvcache
(
3
,
gen
);
auto
v1
=
random_kvcache
(
3
,
gen
);
kvc2
->
raw_insert
(
test_model_name
,
test_quant_type
,
ids1
.
data
(),
ids1
.
size
(),
k1
,
v1
);
// complete same
{
#pragma omp parallel for
for
(
size_t
i
=
0
;
i
<
10
;
i
++
)
{
auto
h
=
kvc2
->
lookup
(
test_model_name
,
test_quant_type
,
ids1
.
data
(),
ids1
.
size
(),
ids1
.
size
()
+
10
*
config
.
num_token_per_page
);
if
(
h
==
nullptr
)
{
SPDLOG_WARN
(
"Thread[{}]: h is nullptr"
,
i
);
}
else
{
auto
k
=
h
->
handle_data
(
true
);
auto
v
=
h
->
handle_data
(
false
);
cmp_handle_data
(
k1
,
k
,
3
);
cmp_handle_data
(
v1
,
v
,
3
);
}
}
}
// // complete prefix
// {
// std::vector<Token> ids2(ids1.begin(), ids1.begin() + 3 * config.num_token_per_page);
// auto h = kvc2->lookup(test_model_name, test_quant_type, ids2.data(), ids2.size(), ids2.size() + 3 *
// config.num_token_per_page); auto k = h->handle_data(true); auto v = h->handle_data(false); cmp_handle_data(k1,
// k, 3); cmp_handle_data(v1, v, 3);
// }
// // common prefix
// {
// std::vector<Token> ids2(ids1.begin(), ids1.begin() + 3 * config.num_token_per_page);
// auto rids = random_ids(config.num_token_per_page * 2 + config.num_token_per_page / 2, gen);
// ids2.insert(ids2.end(), rids.begin(), rids.end());
// auto h = kvc2->lookup(test_model_name, test_quant_type, ids2.data(), ids2.size(), ids2.size());
// auto k = h->handle_data(true);
// auto v = h->handle_data(false);
// cmp_handle_data(k1, k, 3);
// cmp_handle_data(v1, v, 3);
// }
// // no prefix
// {
// std::vector<Token> ids2 = random_ids(config.num_token_per_page, gen);
// auto h = kvc2->lookup(test_model_name, test_quant_type, ids2.data(), ids2.size(), ids2.size());
// assert(h->matched_length() == 0);
// }
// // insert partly new
// auto k2 = random_kvcache(10, gen);
// auto v2 = random_kvcache(10, gen);
// copy_kvcache(k1, k2, 0, 5);
// copy_kvcache(v1, v2, 0, 5);
// auto ids2 = random_ids(10 * config.num_token_per_page, gen);
// for (size_t i = 0; i < 5 * config.num_token_per_page; i++) {
// ids2[i] = ids1[i];
// }
// kvc2->raw_insert(test_model_name, test_quant_type, ids2.data(), ids2.size(), k2, v2);
// // read new part
// {
// std::vector<Token> ids(ids2.begin(), ids2.begin() + 7 * config.num_token_per_page);
// auto h = kvc2->lookup(test_model_name, test_quant_type, ids.data(), ids.size(), ids.size() + 7 *
// config.num_token_per_page); auto k = h->handle_data(true); auto v = h->handle_data(false); cmp_handle_data(k,
// k2, 7); cmp_handle_data(v, v2, 7);
// }
kvc2
->
debug
();
SPDLOG_CRITICAL
(
"All Test Passed: {}"
,
argv
[
0
]);
return
0
;
}
csrc/balance_serve/kvc2/test/kvc2test/lookup-without-vcache.cpp
0 → 100644
View file @
877aec85
/**
* @Description :
* @Author : Xie Weiyu
* @Date : 2024-11-22 08:29:45
* @Version : 1.0.0
* @LastEditors : Xie Weiyu
* @LastEditTime : 2024-11-22 09:56:12
* @Copyright (c) 2024 by KVCache.AI, All Rights Reserved.
**/
#include "common.hpp"
int
main
(
int
argc
,
char
*
argv
[])
{
qw25_7B_gpu_config
.
v_cache_on
=
false
;
config
.
gpu_cache_config
=
qw25_7B_gpu_config
;
config
.
v_cache_on
=
false
;
init
(
argc
,
argv
);
spdlog
::
set_level
(
spdlog
::
level
::
debug
);
auto
kvc2
=
kvc2
::
create_kvc2
(
config
);
std
::
mt19937
gen
(
123
);
auto
ids1
=
random_ids
(
10
*
config
.
num_token_per_page
,
gen
);
auto
k1
=
random_kvcache
(
10
,
gen
);
// auto v1 = random_kvcache(10, gen);
kvc2
->
raw_insert
(
test_model_name
,
test_quant_type
,
ids1
.
data
(),
ids1
.
size
(),
k1
,
{});
// complete same
{
auto
h
=
kvc2
->
lookup
(
test_model_name
,
test_quant_type
,
ids1
.
data
(),
ids1
.
size
(),
ids1
.
size
()
+
10
*
config
.
num_token_per_page
);
auto
k
=
h
->
handle_data
(
true
);
cmp_handle_data
(
k1
,
k
,
10
);
}
// complete prefix
{
std
::
vector
<
Token
>
ids2
(
ids1
.
begin
(),
ids1
.
begin
()
+
3
*
config
.
num_token_per_page
);
auto
h
=
kvc2
->
lookup
(
test_model_name
,
test_quant_type
,
ids2
.
data
(),
ids2
.
size
(),
ids2
.
size
()
+
3
*
config
.
num_token_per_page
);
auto
k
=
h
->
handle_data
(
true
);
cmp_handle_data
(
k1
,
k
,
3
);
}
// common prefix
{
std
::
vector
<
Token
>
ids2
(
ids1
.
begin
(),
ids1
.
begin
()
+
3
*
config
.
num_token_per_page
);
auto
rids
=
random_ids
(
config
.
num_token_per_page
*
2
+
config
.
num_token_per_page
/
2
,
gen
);
ids2
.
insert
(
ids2
.
end
(),
rids
.
begin
(),
rids
.
end
());
auto
h
=
kvc2
->
lookup
(
test_model_name
,
test_quant_type
,
ids2
.
data
(),
ids2
.
size
(),
ids2
.
size
());
auto
k
=
h
->
handle_data
(
true
);
cmp_handle_data
(
k1
,
k
,
3
);
}
// no prefix
{
std
::
vector
<
Token
>
ids2
=
random_ids
(
config
.
num_token_per_page
,
gen
);
auto
h
=
kvc2
->
lookup
(
test_model_name
,
test_quant_type
,
ids2
.
data
(),
ids2
.
size
(),
ids2
.
size
());
assert
(
h
->
matched_length
()
==
0
);
}
// insert partly new
auto
k2
=
random_kvcache
(
10
,
gen
);
copy_kvcache
(
k1
,
k2
,
0
,
5
);
auto
ids2
=
random_ids
(
10
*
config
.
num_token_per_page
,
gen
);
for
(
size_t
i
=
0
;
i
<
5
*
config
.
num_token_per_page
;
i
++
)
{
ids2
[
i
]
=
ids1
[
i
];
}
kvc2
->
raw_insert
(
test_model_name
,
test_quant_type
,
ids2
.
data
(),
ids2
.
size
(),
k2
,
{});
// read new part
{
std
::
vector
<
Token
>
ids
(
ids2
.
begin
(),
ids2
.
begin
()
+
7
*
config
.
num_token_per_page
);
auto
h
=
kvc2
->
lookup
(
test_model_name
,
test_quant_type
,
ids
.
data
(),
ids
.
size
(),
ids
.
size
()
+
7
*
config
.
num_token_per_page
);
auto
k
=
h
->
handle_data
(
true
);
cmp_handle_data
(
k
,
k2
,
7
);
}
SPDLOG_CRITICAL
(
"All Test Passed: {}"
,
argv
[
0
]);
return
0
;
}
csrc/balance_serve/kvc2/test/kvc2test/lookup.cpp
0 → 100644
View file @
877aec85
/**
* @Description :
* @Author : Xie Weiyu
* @Date : 2024-11-22 08:29:45
* @Version : 1.0.0
* @LastEditors : Xie Weiyu
* @LastEditTime : 2024-11-22 09:56:12
* @Copyright (c) 2024 by KVCache.AI, All Rights Reserved.
**/
#include "common.hpp"
int
main
(
int
argc
,
char
*
argv
[])
{
init
(
argc
,
argv
);
spdlog
::
set_level
(
spdlog
::
level
::
debug
);
auto
kvc2
=
kvc2
::
create_kvc2
(
config
);
std
::
mt19937
gen
(
123
);
auto
ids1
=
random_ids
(
10
*
config
.
num_token_per_page
,
gen
);
auto
k1
=
random_kvcache
(
10
,
gen
);
auto
v1
=
random_kvcache
(
10
,
gen
);
kvc2
->
raw_insert
(
test_model_name
,
test_quant_type
,
ids1
.
data
(),
ids1
.
size
(),
k1
,
v1
);
// complete same
{
auto
h
=
kvc2
->
lookup
(
test_model_name
,
test_quant_type
,
ids1
.
data
(),
ids1
.
size
(),
ids1
.
size
()
+
10
*
config
.
num_token_per_page
);
auto
k
=
h
->
handle_data
(
true
);
auto
v
=
h
->
handle_data
(
false
);
cmp_handle_data
(
k1
,
k
,
10
);
cmp_handle_data
(
v1
,
v
,
10
);
}
// complete prefix
{
std
::
vector
<
Token
>
ids2
(
ids1
.
begin
(),
ids1
.
begin
()
+
3
*
config
.
num_token_per_page
);
auto
h
=
kvc2
->
lookup
(
test_model_name
,
test_quant_type
,
ids2
.
data
(),
ids2
.
size
(),
ids2
.
size
()
+
3
*
config
.
num_token_per_page
);
auto
k
=
h
->
handle_data
(
true
);
auto
v
=
h
->
handle_data
(
false
);
cmp_handle_data
(
k1
,
k
,
3
);
cmp_handle_data
(
v1
,
v
,
3
);
}
// common prefix
{
std
::
vector
<
Token
>
ids2
(
ids1
.
begin
(),
ids1
.
begin
()
+
3
*
config
.
num_token_per_page
);
auto
rids
=
random_ids
(
config
.
num_token_per_page
*
2
+
config
.
num_token_per_page
/
2
,
gen
);
ids2
.
insert
(
ids2
.
end
(),
rids
.
begin
(),
rids
.
end
());
auto
h
=
kvc2
->
lookup
(
test_model_name
,
test_quant_type
,
ids2
.
data
(),
ids2
.
size
(),
ids2
.
size
());
auto
k
=
h
->
handle_data
(
true
);
auto
v
=
h
->
handle_data
(
false
);
cmp_handle_data
(
k1
,
k
,
3
);
cmp_handle_data
(
v1
,
v
,
3
);
}
// no prefix
{
std
::
vector
<
Token
>
ids2
=
random_ids
(
config
.
num_token_per_page
,
gen
);
auto
h
=
kvc2
->
lookup
(
test_model_name
,
test_quant_type
,
ids2
.
data
(),
ids2
.
size
(),
ids2
.
size
());
assert
(
h
->
matched_length
()
==
0
);
}
// insert partly new
auto
k2
=
random_kvcache
(
10
,
gen
);
auto
v2
=
random_kvcache
(
10
,
gen
);
copy_kvcache
(
k1
,
k2
,
0
,
5
);
copy_kvcache
(
v1
,
v2
,
0
,
5
);
auto
ids2
=
random_ids
(
10
*
config
.
num_token_per_page
,
gen
);
for
(
size_t
i
=
0
;
i
<
5
*
config
.
num_token_per_page
;
i
++
)
{
ids2
[
i
]
=
ids1
[
i
];
}
kvc2
->
raw_insert
(
test_model_name
,
test_quant_type
,
ids2
.
data
(),
ids2
.
size
(),
k2
,
v2
);
// read new part
{
std
::
vector
<
Token
>
ids
(
ids2
.
begin
(),
ids2
.
begin
()
+
7
*
config
.
num_token_per_page
);
auto
h
=
kvc2
->
lookup
(
test_model_name
,
test_quant_type
,
ids
.
data
(),
ids
.
size
(),
ids
.
size
()
+
7
*
config
.
num_token_per_page
);
auto
k
=
h
->
handle_data
(
true
);
auto
v
=
h
->
handle_data
(
false
);
cmp_handle_data
(
k
,
k2
,
7
);
cmp_handle_data
(
v
,
v2
,
7
);
}
SPDLOG_CRITICAL
(
"All Test Passed: {}"
,
argv
[
0
]);
return
0
;
}
csrc/balance_serve/kvc2/test/kvc2test/raw_insert_read.cpp
0 → 100644
View file @
877aec85
/**
* @Description :
* @Author : Xie Weiyu
* @Date : 2024-11-22 06:00:16
* @Version : 1.0.0
* @LastEditors : Xie Weiyu
* @LastEditTime : 2024-11-22 07:30:46
* @Copyright (c) 2024 by KVCache.AI, All Rights Reserved.
**/
#include "common.hpp"
int
main
(
int
argc
,
char
*
argv
[])
{
init
(
argc
,
argv
);
spdlog
::
set_level
(
spdlog
::
level
::
debug
);
auto
kvc2
=
kvc2
::
create_kvc2
(
config
);
std
::
mt19937
gen
(
123
);
auto
ids1
=
random_ids
(
10
*
config
.
num_token_per_page
,
gen
);
auto
k1
=
random_kvcache
(
10
,
gen
);
auto
v1
=
random_kvcache
(
10
,
gen
);
kvc2
->
raw_insert
(
test_model_name
,
test_quant_type
,
ids1
.
data
(),
ids1
.
size
(),
k1
,
v1
);
// complete same
{
auto
k2
=
empty_kvcache
(
10
);
auto
v2
=
empty_kvcache
(
10
);
auto
l2
=
kvc2
->
raw_read
(
test_model_name
,
test_quant_type
,
ids1
.
data
(),
ids1
.
size
(),
k2
,
v2
);
assert
(
l2
==
ids1
.
size
());
cmp_handle_data
(
k1
,
k2
);
cmp_handle_data
(
v1
,
v2
);
}
// complete prefix
{
auto
k2
=
empty_kvcache
(
10
);
auto
v2
=
empty_kvcache
(
10
);
std
::
vector
<
Token
>
ids2
=
std
::
vector
<
Token
>
(
ids1
.
begin
(),
ids1
.
begin
()
+
3
*
config
.
num_token_per_page
);
auto
l2
=
kvc2
->
raw_read
(
test_model_name
,
test_quant_type
,
ids2
.
data
(),
ids2
.
size
(),
k2
,
v2
);
assert
(
l2
==
3
*
config
.
num_token_per_page
);
cmp_handle_data
(
k1
,
k2
,
3
);
cmp_handle_data
(
v1
,
v2
,
3
);
}
// common prefix
{
auto
k2
=
empty_kvcache
(
10
);
auto
v2
=
empty_kvcache
(
10
);
std
::
vector
<
Token
>
ids2
=
std
::
vector
<
Token
>
(
ids1
.
begin
(),
ids1
.
begin
()
+
3
*
config
.
num_token_per_page
);
auto
rids
=
random_ids
(
config
.
num_token_per_page
*
2
+
config
.
num_token_per_page
/
2
,
gen
);
ids2
.
insert
(
ids2
.
end
(),
rids
.
begin
(),
rids
.
end
());
auto
l2
=
kvc2
->
raw_read
(
test_model_name
,
test_quant_type
,
ids2
.
data
(),
ids2
.
size
(),
k2
,
v2
);
assert
(
l2
==
3
*
config
.
num_token_per_page
);
cmp_handle_data
(
k1
,
k2
,
3
);
cmp_handle_data
(
v1
,
v2
,
3
);
}
// no prefix
{
auto
k2
=
empty_kvcache
(
1
);
auto
v2
=
empty_kvcache
(
1
);
std
::
vector
<
Token
>
ids2
=
random_ids
(
config
.
num_token_per_page
,
gen
);
auto
l2
=
kvc2
->
raw_read
(
test_model_name
,
test_quant_type
,
ids2
.
data
(),
ids2
.
size
(),
k2
,
v2
);
assert
(
l2
==
0
);
}
// insert partly new
auto
k2
=
random_kvcache
(
10
,
gen
);
auto
v2
=
random_kvcache
(
10
,
gen
);
copy_kvcache
(
k1
,
k2
,
0
,
5
);
copy_kvcache
(
v1
,
v2
,
0
,
5
);
auto
ids2
=
random_ids
(
10
*
config
.
num_token_per_page
,
gen
);
for
(
size_t
i
=
0
;
i
<
5
*
config
.
num_token_per_page
;
i
++
)
{
ids2
[
i
]
=
ids1
[
i
];
}
kvc2
->
raw_insert
(
test_model_name
,
test_quant_type
,
ids2
.
data
(),
ids2
.
size
(),
k2
,
v2
);
// read new part
{
auto
k
=
empty_kvcache
(
10
);
auto
v
=
empty_kvcache
(
10
);
std
::
vector
<
Token
>
ids
=
std
::
vector
<
Token
>
(
ids2
.
begin
(),
ids2
.
begin
()
+
7
*
config
.
num_token_per_page
);
auto
l
=
kvc2
->
raw_read
(
test_model_name
,
test_quant_type
,
ids
.
data
(),
ids
.
size
(),
k
,
v
);
assert
(
l
==
7
*
config
.
num_token_per_page
);
cmp_handle_data
(
k
,
k2
,
7
);
cmp_handle_data
(
v
,
v2
,
7
);
}
SPDLOG_CRITICAL
(
"All Test Passed: {}"
,
argv
[
0
]);
return
0
;
}
csrc/balance_serve/kvc2/test/kvcache_disk_insert_read_test.cpp
0 → 100644
View file @
877aec85
#include "kvcache_test_utils.cpp"
int
main
(
int
argc
,
char
*
argv
[])
{
parse_and_check
(
argc
,
argv
);
spdlog
::
set_level
(
spdlog
::
level
::
debug
);
std
::
mt19937
gen
(
123
);
KVC2
kvc2
(
FLAGS_disk_cache_path
);
// auto io = kvc2.io_dealer->start_io_thread();
kvc2
.
io_dealer
->
start_io_thread
().
detach
();
auto
h1
=
random_kvcache
(
qwen_cache_info
,
10
,
gen
);
h1
.
ids
=
random_ids
(
10
*
BlockLength
,
gen
);
kvc2
.
raw_insert
(
h1
);
// complete same
{
auto
h2
=
empty_kvcache
(
qwen_cache_info
,
10
);
h2
.
ids
=
h1
.
ids
;
kvc2
.
raw_read
(
h2
);
assert
(
static_cast
<
size_t
>
(
h2
.
match
.
match_length
)
==
h1
.
ids
.
size
());
cmp_handle_data
(
h1
,
h2
);
}
// complete prefix
{
auto
h2
=
empty_kvcache
(
qwen_cache_info
,
10
);
h2
.
ids
=
std
::
vector
<
ID
>
(
h1
.
ids
.
begin
(),
h1
.
ids
.
begin
()
+
3
*
BlockLength
);
kvc2
.
raw_read
(
h2
);
assert
(
h2
.
match
.
match_length
==
3
*
BlockLength
);
cmp_handle_data
(
h1
,
h2
,
3
);
}
// common prefix
{
auto
h2
=
empty_kvcache
(
qwen_cache_info
,
10
);
h2
.
ids
=
std
::
vector
<
ID
>
(
h1
.
ids
.
begin
(),
h1
.
ids
.
begin
()
+
5
*
BlockLength
);
auto
rids
=
random_ids
(
BlockLength
*
2
+
BlockLength
/
2
,
gen
);
h2
.
ids
.
insert
(
h2
.
ids
.
end
(),
rids
.
begin
(),
rids
.
end
());
kvc2
.
raw_read
(
h2
);
assert
(
h2
.
match
.
match_length
==
5
*
BlockLength
);
cmp_handle_data
(
h1
,
h2
,
5
);
}
// no prefix
{
auto
h2
=
empty_kvcache
(
qwen_cache_info
,
10
);
h2
.
ids
=
random_ids
(
10
*
BlockLength
,
gen
);
kvc2
.
raw_read
(
h2
);
assert
(
h2
.
match
.
match_length
==
0
);
}
// insert partly new
auto
h2
=
random_kvcache
(
qwen_cache_info
,
10
,
gen
);
copy_kvcache
(
h1
,
h2
,
0
,
5
);
h2
.
ids
=
random_ids
(
10
*
BlockLength
,
gen
);
for
(
size_t
i
=
0
;
i
<
5
*
BlockLength
;
i
++
)
{
h2
.
ids
[
i
]
=
h1
.
ids
[
i
];
}
kvc2
.
raw_insert
(
h2
);
// read new part
{
auto
h
=
empty_kvcache
(
qwen_cache_info
,
10
);
h
.
ids
=
std
::
vector
<
ID
>
(
h2
.
ids
.
begin
(),
h2
.
ids
.
begin
()
+
7
*
BlockLength
);
h
.
ids
.
push_back
(
123
);
kvc2
.
raw_read
(
h
);
assert
(
h
.
match
.
match_length
==
7
*
BlockLength
);
cmp_handle_data
(
h
,
h2
,
7
);
}
kvc2
.
tree
->
debug
();
kvc2
.
io_dealer
->
stop
();
// io.join();
SPDLOG_WARN
(
"{} Test Passed"
,
__FILE__
);
return
0
;
}
\ No newline at end of file
csrc/balance_serve/kvc2/test/kvcache_mem_eviction_test.cpp
0 → 100644
View file @
877aec85
#include "kvcache_test_utils.cpp"
int
main
(
int
argc
,
char
*
argv
[])
{
parse_and_check
(
argc
,
argv
);
spdlog
::
set_level
(
spdlog
::
level
::
debug
);
std
::
mt19937
gen
(
123
);
KVC2
kvc2
(
FLAGS_disk_cache_path
);
auto
io
=
kvc2
.
io_dealer
->
start_io_thread
();
SPDLOG_WARN
(
"Insert 10 x 10 KVCache"
);
std
::
vector
<
KVCacheHandle
>
handles
(
10
);
for
(
int
i
=
0
;
i
<
10
;
i
++
)
{
handles
[
i
]
=
random_kvcache
(
qwen_cache_info
,
10
,
gen
);
auto
&
h1
=
handles
[
i
];
h1
.
ids
=
random_ids
(
10
*
BlockLength
,
gen
);
kvc2
.
raw_insert
(
h1
);
}
SPDLOG_WARN
(
"Cache Eviction Test"
);
{
for
(
int
i
=
0
;
i
<
10
;
i
++
)
{
auto
&
h
=
handles
[
i
];
SPDLOG_WARN
(
"Lookup {}"
,
i
);
auto
x
=
kvc2
.
lookup
(
qwen_cache_info
,
h
.
ids
.
data
(),
h
.
ids
.
size
());
cmp_handle_data
(
h
,
*
x
);
}
SPDLOG_WARN
(
"Simple Eviction OK"
);
}
{
std
::
vector
<
std
::
shared_ptr
<
KVCacheHandle
>>
lookup_handles
;
for
(
int
i
=
0
;
i
<
10
;
i
++
)
{
auto
&
h
=
handles
[
i
];
SPDLOG_WARN
(
"Lookup {}"
,
i
);
auto
x
=
kvc2
.
lookup
(
qwen_cache_info
,
h
.
ids
.
data
(),
h
.
ids
.
size
());
if
(
i
>=
5
)
{
assert
(
x
==
nullptr
);
continue
;
}
lookup_handles
.
push_back
(
x
);
cmp_handle_data
(
h
,
*
x
);
}
SPDLOG_WARN
(
"Cannot Eviction OK"
);
}
kvc2
.
io_dealer
->
stop
();
io
.
join
();
SPDLOG_WARN
(
"{} Test Passed"
,
__FILE__
);
return
0
;
}
\ No newline at end of file
csrc/balance_serve/kvc2/test/kvcache_mem_insert_read_test.cpp
0 → 100644
View file @
877aec85
#include "kvcache_test_utils.cpp"
int
main
(
int
argc
,
char
*
argv
[])
{
parse_and_check
(
argc
,
argv
);
spdlog
::
set_level
(
spdlog
::
level
::
debug
);
std
::
mt19937
gen
(
123
);
KVC2
kvc2
(
FLAGS_disk_cache_path
);
auto
io
=
kvc2
.
io_dealer
->
start_io_thread
();
SPDLOG_INFO
(
"Disk Test"
);
auto
h1
=
random_kvcache
(
qwen_cache_info
,
10
,
gen
);
h1
.
ids
=
random_ids
(
10
*
BlockLength
,
gen
);
kvc2
.
raw_insert
(
h1
);
// complete same
{
auto
h2
=
empty_kvcache
(
qwen_cache_info
,
10
);
h2
.
ids
=
h1
.
ids
;
kvc2
.
raw_read
(
h2
);
assert
(
static_cast
<
size_t
>
(
h2
.
match
.
match_length
)
==
h1
.
ids
.
size
());
cmp_handle_data
(
h1
,
h2
);
}
// complete prefix
{
auto
h2
=
empty_kvcache
(
qwen_cache_info
,
10
);
h2
.
ids
=
std
::
vector
<
ID
>
(
h1
.
ids
.
begin
(),
h1
.
ids
.
begin
()
+
3
*
BlockLength
);
kvc2
.
raw_read
(
h2
);
assert
(
h2
.
match
.
match_length
==
3
*
BlockLength
);
cmp_handle_data
(
h1
,
h2
,
3
);
}
// common prefix
{
auto
h2
=
empty_kvcache
(
qwen_cache_info
,
10
);
h2
.
ids
=
std
::
vector
<
ID
>
(
h1
.
ids
.
begin
(),
h1
.
ids
.
begin
()
+
5
*
BlockLength
);
auto
rids
=
random_ids
(
BlockLength
*
2
+
BlockLength
/
2
,
gen
);
h2
.
ids
.
insert
(
h2
.
ids
.
end
(),
rids
.
begin
(),
rids
.
end
());
kvc2
.
raw_read
(
h2
);
assert
(
h2
.
match
.
match_length
==
5
*
BlockLength
);
cmp_handle_data
(
h1
,
h2
,
5
);
}
// no prefix
{
auto
h2
=
empty_kvcache
(
qwen_cache_info
,
10
);
h2
.
ids
=
random_ids
(
10
*
BlockLength
,
gen
);
kvc2
.
raw_read
(
h2
);
assert
(
h2
.
match
.
match_length
==
0
);
}
// insert partly new
auto
h2
=
random_kvcache
(
qwen_cache_info
,
10
,
gen
);
copy_kvcache
(
h1
,
h2
,
0
,
5
);
h2
.
ids
=
random_ids
(
10
*
BlockLength
,
gen
);
for
(
size_t
i
=
0
;
i
<
5
*
BlockLength
;
i
++
)
{
h2
.
ids
[
i
]
=
h1
.
ids
[
i
];
}
kvc2
.
raw_insert
(
h2
);
// read new part
{
auto
h
=
empty_kvcache
(
qwen_cache_info
,
10
);
h
.
ids
=
std
::
vector
<
ID
>
(
h2
.
ids
.
begin
(),
h2
.
ids
.
begin
()
+
7
*
BlockLength
);
h
.
ids
.
push_back
(
123
);
kvc2
.
raw_read
(
h
);
assert
(
h
.
match
.
match_length
==
7
*
BlockLength
);
cmp_handle_data
(
h
,
h2
,
7
);
}
SPDLOG_WARN
(
"Memory Test"
);
{
auto
h
=
kvc2
.
lookup
(
qwen_cache_info
,
h1
.
ids
.
data
(),
h1
.
ids
.
size
());
assert
(
h
);
cmp_handle_data
(
h1
,
*
h
);
kvc2
.
block_cache
->
debug
();
}
kvc2
.
block_cache
->
debug
();
{
auto
h
=
kvc2
.
lookup
(
qwen_cache_info
,
h1
.
ids
.
data
(),
5
*
BlockLength
);
assert
(
h
);
cmp_handle_data
(
h1
,
*
h
,
5
);
kvc2
.
block_cache
->
debug
();
}
kvc2
.
block_cache
->
debug
();
kvc2
.
io_dealer
->
stop
();
io
.
join
();
SPDLOG_WARN
(
"{} Test Passed"
,
__FILE__
);
return
0
;
}
\ No newline at end of file
csrc/balance_serve/kvc2/test/kvcache_save_load_test.cpp
0 → 100644
View file @
877aec85
#include "kvcache_test_utils.cpp"
int
main
(
int
argc
,
char
*
argv
[])
{
parse_and_check
(
argc
,
argv
);
spdlog
::
set_level
(
spdlog
::
level
::
debug
);
std
::
mt19937
gen
(
123
);
std
::
vector
<
KVCacheHandle
>
handles
(
10
);
{
KVC2
kvc2
(
FLAGS_disk_cache_path
);
auto
io
=
kvc2
.
io_dealer
->
start_io_thread
();
SPDLOG_WARN
(
"Insert 10 x 10 KVCache"
);
for
(
int
i
=
0
;
i
<
10
;
i
++
)
{
handles
[
i
]
=
random_kvcache
(
qwen_cache_info
,
10
,
gen
);
auto
&
h1
=
handles
[
i
];
h1
.
ids
=
random_ids
(
10
*
BlockLength
,
gen
);
kvc2
.
raw_insert
(
h1
);
}
kvc2
.
save
();
kvc2
.
tree
->
debug
();
kvc2
.
io_dealer
->
stop
();
io
.
join
();
}
{
KVC2
kvc2
(
FLAGS_disk_cache_path
);
auto
io
=
kvc2
.
io_dealer
->
start_io_thread
();
kvc2
.
load
();
kvc2
.
tree
->
debug
();
auto
&
h1
=
handles
[
0
];
// complete same
{
auto
h2
=
empty_kvcache
(
qwen_cache_info
,
10
);
h2
.
ids
=
h1
.
ids
;
kvc2
.
raw_read
(
h2
);
assert
(
static_cast
<
size_t
>
(
h2
.
match
.
match_length
)
==
h1
.
ids
.
size
());
cmp_handle_data
(
h1
,
h2
);
}
// complete prefix
{
auto
h2
=
empty_kvcache
(
qwen_cache_info
,
10
);
h2
.
ids
=
std
::
vector
<
ID
>
(
h1
.
ids
.
begin
(),
h1
.
ids
.
begin
()
+
3
*
BlockLength
);
kvc2
.
raw_read
(
h2
);
assert
(
h2
.
match
.
match_length
==
3
*
BlockLength
);
cmp_handle_data
(
h1
,
h2
,
3
);
}
// common prefix
{
auto
h2
=
empty_kvcache
(
qwen_cache_info
,
10
);
h2
.
ids
=
std
::
vector
<
ID
>
(
h1
.
ids
.
begin
(),
h1
.
ids
.
begin
()
+
5
*
BlockLength
);
auto
rids
=
random_ids
(
BlockLength
*
2
+
BlockLength
/
2
,
gen
);
h2
.
ids
.
insert
(
h2
.
ids
.
end
(),
rids
.
begin
(),
rids
.
end
());
kvc2
.
raw_read
(
h2
);
assert
(
h2
.
match
.
match_length
==
5
*
BlockLength
);
cmp_handle_data
(
h1
,
h2
,
5
);
}
// no prefix
{
auto
h2
=
empty_kvcache
(
qwen_cache_info
,
10
);
h2
.
ids
=
random_ids
(
10
*
BlockLength
,
gen
);
kvc2
.
raw_read
(
h2
);
assert
(
h2
.
match
.
match_length
==
0
);
}
// insert partly new
auto
h2
=
random_kvcache
(
qwen_cache_info
,
10
,
gen
);
copy_kvcache
(
h1
,
h2
,
0
,
5
);
h2
.
ids
=
random_ids
(
10
*
BlockLength
,
gen
);
for
(
size_t
i
=
0
;
i
<
5
*
BlockLength
;
i
++
)
{
h2
.
ids
[
i
]
=
h1
.
ids
[
i
];
}
kvc2
.
raw_insert
(
h2
);
// read new part
{
auto
h
=
empty_kvcache
(
qwen_cache_info
,
10
);
h
.
ids
=
std
::
vector
<
ID
>
(
h2
.
ids
.
begin
(),
h2
.
ids
.
begin
()
+
7
*
BlockLength
);
h
.
ids
.
push_back
(
123
);
kvc2
.
raw_read
(
h
);
assert
(
h
.
match
.
match_length
==
7
*
BlockLength
);
cmp_handle_data
(
h
,
h2
,
7
);
}
kvc2
.
io_dealer
->
stop
();
io
.
join
();
}
SPDLOG_WARN
(
"{} Test Passed"
,
__FILE__
);
return
0
;
}
\ No newline at end of file
csrc/balance_serve/kvc2/test/kvcache_test_utils.cpp
0 → 100644
View file @
877aec85
csrc/balance_serve/kvc2/test/page_pool_test.cpp
0 → 100644
View file @
877aec85
#include <unistd.h>
#include <iostream>
#include <random>
#include <thread>
#include <vector>
#include "page_aligned_memory_pool.cpp"
#define SPDLOG_ACTIVE_LEVEL SPDLOG_LEVEL_DEBUG
#define FMT_HEADER_ONLY
#include "spdlog/spdlog.h"
// 每个线程执行的任务
void
thread_task
(
PageAlignedMemoryPool
&
pool
)
{
std
::
mt19937
gen
(
123
);
std
::
vector
<
std
::
pair
<
void
*
,
size_t
>>
allocated
;
size_t
cnt
=
40000
;
for
(
size_t
i
=
0
;
i
<
cnt
;
++
i
)
{
// 随机分配一个大小
size_t
size
=
(
gen
()
%
100
+
1
)
*
4096
*
4
;
void
*
ptr
=
pool
.
alloc
(
size
);
// SPDLOG_DEBUG(pool.debug());
if
(
ptr
)
{
pool
.
free
(
ptr
,
size
);
// allocated.push_back({ptr, size});
}
// sleep((int)(gen() % 1000) / 1000.0);
}
// free all memory
for
(
auto
&
p
:
allocated
)
{
pool
.
free
(
p
.
first
,
p
.
second
);
}
}
int
main
(
int
argc
,
char
*
argv
[])
{
spdlog
::
set_level
(
spdlog
::
level
::
debug
);
// 创建一个内存池
PageAlignedMemoryPool
pool
(
40ll
*
1024
*
1024
*
1024
);
// 40 G
// 创建线程
const
int
num_threads
=
32
;
std
::
vector
<
std
::
thread
>
threads
;
for
(
int
i
=
0
;
i
<
num_threads
;
++
i
)
{
threads
.
emplace_back
(
thread_task
,
std
::
ref
(
pool
));
}
// 等待所有线程完成
for
(
auto
&
t
:
threads
)
{
t
.
join
();
}
// 输出调试信息
std
::
cout
<<
pool
.
debug
()
<<
std
::
endl
;
return
0
;
}
\ No newline at end of file
csrc/balance_serve/kvc2/test/prefix_test.cpp
0 → 100644
View file @
877aec85
csrc/balance_serve/kvc2/test/pytest_load.py
0 → 100644
View file @
877aec85
import
sys
sys
.
path
.
append
(
'./build'
)
sys
.
path
.
append
(
'./src'
)
import
torch
import
kvc2_ext
from
kvc2_utils
import
get_tensor_from_data_ptr
# Create a kvc2 instance
path
=
"/mnt/data/kvc2"
kvc2_instance
=
kvc2_ext
.
create_kvc2
(
path
,
int
(
10e9
))
# 10 G memory pool
kvc2_ext
.
load
(
kvc2_instance
)
# Start IO thread
print
(
"Start IO thread"
)
kvc2_ext
.
start_io_thread
(
kvc2_instance
)
print
(
"IO thread started"
)
# Create CacheInfoInput
test_info
=
kvc2_ext
.
CacheInfoInput
()
test_info
.
model_type
=
kvc2_ext
.
ModelType
.
MT_DeepseekV2
test_info
.
cache_type
=
kvc2_ext
.
CacheType
.
CT_KeyCache
test_info
.
quant_type
=
kvc2_ext
.
QuantType
.
QT_F32
print
(
"Element size: "
,
test_info
.
element_size
())
# Generate random test IDs (length = 2560)
torch
.
manual_seed
(
123
)
length
=
2560
test_id
=
torch
.
randint
(
0
,
65536
,
(
length
,),
dtype
=
torch
.
uint16
).
contiguous
()
block_count
=
(
length
+
255
)
//
256
# print("Test ID: ", test_id)
# Generate test data based on element size and hidden layer count
element_size
=
test_info
.
element_size
()
hidden_layer_count
=
test_info
.
hidden_layer_count
()
def
read_cmp_and_release
(
kvc2_instance
,
cache_info
,
ids
,
length
):
handle
=
kvc2_ext
.
lookup
(
kvc2_instance
,
cache_info
,
ids
,
length
)
if
kvc2_ext
.
is_nullptr
(
handle
):
print
(
"Handle is nullptr."
)
exit
()
matched_length
=
kvc2_ext
.
matched_length
(
handle
)
matched_data
=
kvc2_ext
.
handle_data
(
handle
)
print
(
'Matched length: '
,
matched_length
)
if
matched_length
>
0
:
print
(
f
'First layer address
{
[
hex
(
x
)
for
x
in
matched_data
[
0
]]
}
'
)
read_data
=
get_tensor_from_data_ptr
(
matched_data
,
element_size
)
print
(
"Just read check ok."
)
kvc2_ext
.
release
(
handle
)
l
=
128
while
l
<=
length
:
read_cmp_and_release
(
kvc2_instance
,
test_info
,
test_id
.
data_ptr
(),
l
)
l
+=
128
kvc2_ext
.
destroy_kvc2
(
kvc2_instance
)
print
(
"Test completed successfully."
)
csrc/balance_serve/kvc2/test/pytest_mem_prefix_test.py
0 → 100644
View file @
877aec85
import
sys
sys
.
path
.
append
(
'./build'
)
sys
.
path
.
append
(
'./src'
)
import
torch
import
kvc2_ext
from
kvc2_utils
import
alloc_aligned_cache
,
dealloc_aligned_cache
,
get_tensor_ptr
,
get_tensor_from_data_ptr
# Create a kvc2 instance
path
=
"/mnt/data/kvc2"
kvc2_instance
=
kvc2_ext
.
create_kvc2
(
path
,
int
(
10e9
))
# 10 G memory pool
# Start IO thread
print
(
"Start IO thread"
)
kvc2_ext
.
start_io_thread
(
kvc2_instance
)
print
(
"IO thread started"
)
# Create CacheInfoInput
test_info
=
kvc2_ext
.
CacheInfoInput
()
test_info
.
model_type
=
kvc2_ext
.
ModelType
.
MT_DeepseekV2
test_info
.
cache_type
=
kvc2_ext
.
CacheType
.
CT_KeyCache
test_info
.
quant_type
=
kvc2_ext
.
QuantType
.
QT_F32
print
(
"Element size: "
,
test_info
.
element_size
())
# Generate random test IDs (length = 2560)
torch
.
manual_seed
(
123
)
length
=
2560
test_id
=
torch
.
randint
(
0
,
65536
,
(
length
,),
dtype
=
torch
.
uint16
).
contiguous
()
block_count
=
(
length
+
255
)
//
256
# print("Test ID: ", test_id)
# Generate test data based on element size and hidden layer count
element_size
=
test_info
.
element_size
()
hidden_layer_count
=
test_info
.
hidden_layer_count
()
write_data
,
write_data_mem
=
alloc_aligned_cache
(
hidden_layer_count
,
block_count
,
element_size
)
# print(test_data,test_data_mem)
print
(
'Generate Insert Data'
)
for
layer
in
write_data
:
for
data
in
layer
:
random_values
=
torch
.
randint
(
0
,
256
,
(
element_size
,),
dtype
=
torch
.
uint8
)
data
.
copy_
(
random_values
)
print
(
'Insert New data'
)
# Insert raw data
kvc2_ext
.
raw_insert
(
kvc2_instance
,
test_info
,
test_id
.
data_ptr
(),
length
,
get_tensor_ptr
(
write_data
))
def
read_cmp_and_release
(
kvc2_instance
,
cache_info
,
ids
,
length
):
handle
=
kvc2_ext
.
lookup
(
kvc2_instance
,
cache_info
,
ids
,
length
)
if
kvc2_ext
.
is_nullptr
(
handle
):
print
(
"Handle is nullptr."
)
exit
()
matched_length
=
kvc2_ext
.
matched_length
(
handle
)
matched_data
=
kvc2_ext
.
handle_data
(
handle
)
print
(
'Matched length: '
,
matched_length
)
if
matched_length
>
0
:
print
(
f
'First layer address
{
[
hex
(
x
)
for
x
in
matched_data
[
0
]]
}
'
)
read_data
=
get_tensor_from_data_ptr
(
matched_data
,
element_size
)
for
layer_w
,
layer_r
in
zip
(
write_data
,
read_data
):
for
data_w
,
data_r
in
zip
(
layer_w
,
layer_r
):
# print(data_w,data_r)
assert
torch
.
equal
(
data_w
,
data_r
)
print
(
"Lookup read check ok."
)
kvc2_ext
.
release
(
handle
)
l
=
128
while
l
<=
length
:
read_cmp_and_release
(
kvc2_instance
,
test_info
,
test_id
.
data_ptr
(),
l
)
l
+=
128
dealloc_aligned_cache
(
write_data_mem
)
kvc2_ext
.
save
(
kvc2_instance
)
kvc2_ext
.
destroy_kvc2
(
kvc2_instance
)
print
(
"Test completed successfully."
)
csrc/balance_serve/kvc2/test/pytest_mem_read.py
0 → 100644
View file @
877aec85
import
sys
sys
.
path
.
append
(
'./build'
)
sys
.
path
.
append
(
'./src'
)
import
torch
import
kvc2_ext
from
kvc2_utils
import
alloc_aligned_cache
,
dealloc_aligned_cache
,
get_tensor_ptr
,
get_tensor_from_data_ptr
# Create a kvc2 instance
path
=
"/mnt/data/kvc2"
kvc2_instance
=
kvc2_ext
.
create_kvc2
(
path
,
int
(
10e9
))
# 10 G memory pool
# Start IO thread
print
(
"Start IO thread"
)
kvc2_ext
.
start_io_thread
(
kvc2_instance
)
print
(
"IO thread started"
)
# Create CacheInfoInput
test_info
=
kvc2_ext
.
CacheInfoInput
()
test_info
.
model_type
=
kvc2_ext
.
ModelType
.
MT_DeepseekV2
test_info
.
cache_type
=
kvc2_ext
.
CacheType
.
CT_KeyCache
test_info
.
quant_type
=
kvc2_ext
.
QuantType
.
QT_F32
print
(
"Element size: "
,
test_info
.
element_size
())
# Generate random test IDs (length = 2560)
length
=
2560
test_id
=
torch
.
randint
(
0
,
65536
,
(
length
,),
dtype
=
torch
.
uint16
).
contiguous
()
block_count
=
(
length
+
255
)
//
256
# print("Test ID: ", test_id)
# Generate test data based on element size and hidden layer count
element_size
=
test_info
.
element_size
()
hidden_layer_count
=
test_info
.
hidden_layer_count
()
write_data
,
write_data_mem
=
alloc_aligned_cache
(
hidden_layer_count
,
block_count
,
element_size
)
# print(test_data,test_data_mem)
print
(
'Generate Insert Data'
)
for
layer
in
write_data
:
for
data
in
layer
:
random_values
=
torch
.
randint
(
0
,
256
,
(
element_size
,),
dtype
=
torch
.
uint8
)
data
.
copy_
(
random_values
)
print
(
'Insert New data'
)
# Insert raw data
kvc2_ext
.
raw_insert
(
kvc2_instance
,
test_info
,
test_id
.
data_ptr
(),
length
,
get_tensor_ptr
(
write_data
))
handle
=
kvc2_ext
.
lookup
(
kvc2_instance
,
test_info
,
test_id
.
data_ptr
(),
length
)
matched_length
=
kvc2_ext
.
matched_length
(
handle
)
matched_data
=
kvc2_ext
.
handle_data
(
handle
)
print
(
'Matched length: '
,
matched_length
)
print
(
f
'Match data layer
{
len
(
matched_data
)
}
'
)
print
(
f
'Match layer block count
{
len
(
matched_data
[
0
])
}
'
)
read_data
=
get_tensor_from_data_ptr
(
matched_data
,
element_size
)
for
layer_w
,
layer_r
in
zip
(
write_data
,
read_data
):
for
data_w
,
data_r
in
zip
(
layer_w
,
layer_r
):
# print(data_w,data_r)
assert
torch
.
equal
(
data_w
,
data_r
)
print
(
"Lookup read check ok."
)
dealloc_aligned_cache
(
write_data_mem
)
kvc2_ext
.
save
(
kvc2_instance
)
print
(
"Test completed successfully."
)
csrc/balance_serve/kvc2/test/pytest_raw_insert_and_read.py
0 → 100644
View file @
877aec85
import
sys
sys
.
path
.
append
(
'./build'
)
sys
.
path
.
append
(
'./src'
)
import
torch
import
kvc2_ext
from
kvc2_utils
import
alloc_aligned_cache
,
dealloc_aligned_cache
,
get_tensor_ptr
# Create a kvc2 instance
path
=
"/mnt/data/kvc2"
kvc2_instance
=
kvc2_ext
.
create_kvc2
(
path
,
int
(
10e9
))
# 10 G memory pool
# Start IO thread
print
(
"Start IO thread"
)
kvc2_ext
.
start_io_thread
(
kvc2_instance
)
print
(
"IO thread started"
)
# Create CacheInfoInput
test_info
=
kvc2_ext
.
CacheInfoInput
()
test_info
.
model_type
=
kvc2_ext
.
ModelType
.
MT_DeepseekV2
test_info
.
cache_type
=
kvc2_ext
.
CacheType
.
CT_KeyCache
test_info
.
quant_type
=
kvc2_ext
.
QuantType
.
QT_F32
print
(
"Element size: "
,
test_info
.
element_size
())
# Generate random test IDs (length = 2560)
length
=
2560
test_id
=
torch
.
randint
(
0
,
65536
,
(
length
,),
dtype
=
torch
.
uint16
).
contiguous
()
block_count
=
(
length
+
255
)
//
256
# print("Test ID: ", test_id)
# Generate test data based on element size and hidden layer count
element_size
=
test_info
.
element_size
()
hidden_layer_count
=
test_info
.
hidden_layer_count
()
write_data
,
write_data_mem
=
alloc_aligned_cache
(
hidden_layer_count
,
block_count
,
element_size
)
# print(test_data,test_data_mem)
print
(
'Generate Insert Data'
)
for
layer
in
write_data
:
for
data
in
layer
:
random_values
=
torch
.
randint
(
0
,
256
,
(
element_size
,),
dtype
=
torch
.
uint8
)
data
.
copy_
(
random_values
)
print
(
'Insert New data'
)
# Insert raw data
kvc2_ext
.
raw_insert
(
kvc2_instance
,
test_info
,
test_id
.
data_ptr
(),
length
,
get_tensor_ptr
(
write_data
))
read_data
,
read_data_mem
=
alloc_aligned_cache
(
hidden_layer_count
,
block_count
,
element_size
)
print
(
'Raw read'
)
matched_length
=
kvc2_ext
.
raw_read
(
kvc2_instance
,
test_info
,
test_id
.
data_ptr
(),
length
,
get_tensor_ptr
(
read_data
))
print
(
'Matched length: '
,
matched_length
)
for
layer_w
,
layer_r
in
zip
(
write_data
,
read_data
):
for
data_w
,
data_r
in
zip
(
layer_w
,
layer_r
):
# print(data_w,data_r)
assert
torch
.
equal
(
data_w
,
data_r
)
print
(
"Raw read check ok."
)
dealloc_aligned_cache
(
write_data_mem
)
dealloc_aligned_cache
(
read_data_mem
)
kvc2_ext
.
save
(
kvc2_instance
)
print
(
"Test completed successfully."
)
csrc/balance_serve/kvc2/test/test_align.py
0 → 100644
View file @
877aec85
import
ctypes
import
torch
def
aligned_tensor
(
size
,
alignment
=
4096
):
num_bytes
=
size
mem
=
ctypes
.
c_void_p
()
error_code
=
ctypes
.
CDLL
(
None
).
posix_memalign
(
ctypes
.
byref
(
mem
),
ctypes
.
c_size_t
(
alignment
),
ctypes
.
c_size_t
(
num_bytes
)
)
if
error_code
!=
0
:
raise
MemoryError
(
f
"posix_memalign failed with error code
{
error_code
}
"
)
array_type
=
(
ctypes
.
c_int8
*
size
)
raw_array
=
array_type
.
from_address
(
mem
.
value
)
tensor
=
torch
.
frombuffer
(
raw_array
,
dtype
=
torch
.
int8
)
if
tensor
.
data_ptr
()
%
alignment
!=
0
:
raise
ValueError
(
f
"Tensor data_ptr
{
tensor
.
data_ptr
()
}
is not aligned to
{
alignment
}
bytes"
)
return
tensor
,
mem
size
=
5124380
tensor
,
mem_ptr
=
aligned_tensor
(
size
,
alignment
=
4096
)
print
(
f
"Tensor:
{
tensor
}
, size:
{
tensor
.
size
()
}
, dataptr:
{
tensor
.
data_ptr
()
}
"
)
print
(
f
"Tensor memory alignment:
{
tensor
.
data_ptr
()
%
4096
==
0
}
"
)
print
(
f
"Allocated memory address:
{
mem_ptr
.
value
}
"
)
ctypes
.
CDLL
(
None
).
free
(
mem_ptr
)
csrc/balance_serve/kvc2/test/test_cuda_stream.cpp
0 → 100644
View file @
877aec85
#include <cuda_runtime.h>
#include <functional>
#include <iostream>
#include <stdexcept>
#include <vector>
class
CudaStreamManager
{
public:
CudaStreamManager
(
int
num_streams
);
~
CudaStreamManager
();
// Request structure
struct
Request
{
std
::
vector
<
void
*>
host_mem_addresses
;
std
::
vector
<
void
*>
device_mem_addresses
;
std
::
vector
<
size_t
>
sizes
;
cudaMemcpyKind
direction
;
std
::
function
<
void
()
>
callback
;
};
void
submitRequest
(
const
Request
&
request
);
private:
int
num_streams_
;
std
::
vector
<
cudaStream_t
>
streams_
;
int
next_stream_index_
;
};
CudaStreamManager
::
CudaStreamManager
(
int
num_streams
)
:
num_streams_
(
num_streams
),
next_stream_index_
(
0
)
{
streams_
.
resize
(
num_streams_
);
for
(
int
i
=
0
;
i
<
num_streams_
;
++
i
)
{
cudaError_t
err
=
cudaStreamCreate
(
&
streams_
[
i
]);
if
(
err
!=
cudaSuccess
)
{
std
::
cerr
<<
"Failed to create CUDA stream: "
<<
cudaGetErrorString
(
err
)
<<
std
::
endl
;
for
(
int
j
=
0
;
j
<
i
;
++
j
)
{
cudaStreamDestroy
(
streams_
[
j
]);
}
throw
std
::
runtime_error
(
"Failed to create CUDA stream"
);
}
}
}
CudaStreamManager
::~
CudaStreamManager
()
{
for
(
int
i
=
0
;
i
<
num_streams_
;
++
i
)
{
cudaStreamDestroy
(
streams_
[
i
]);
}
}
void
CudaStreamManager
::
submitRequest
(
const
Request
&
request
)
{
int
stream_index
=
next_stream_index_
;
cudaStream_t
stream
=
streams_
[
stream_index
];
next_stream_index_
=
(
next_stream_index_
+
1
)
%
num_streams_
;
size_t
num_transfers
=
request
.
host_mem_addresses
.
size
();
for
(
size_t
i
=
0
;
i
<
num_transfers
;
++
i
)
{
cudaError_t
err
=
cudaMemcpyAsync
(
request
.
device_mem_addresses
[
i
],
request
.
host_mem_addresses
[
i
],
request
.
sizes
[
i
],
request
.
direction
,
stream
);
if
(
err
!=
cudaSuccess
)
{
std
::
cerr
<<
"cudaMemcpyAsync failed: "
<<
cudaGetErrorString
(
err
)
<<
std
::
endl
;
throw
std
::
runtime_error
(
"cudaMemcpyAsync failed"
);
}
}
// Enqueue the callback function
struct
CallbackData
{
std
::
function
<
void
()
>
callback
;
};
CallbackData
*
cb_data
=
new
CallbackData
{
request
.
callback
};
cudaError_t
err
=
cudaLaunchHostFunc
(
stream
,
[](
void
*
data
)
{
CallbackData
*
cb_data
=
static_cast
<
CallbackData
*>
(
data
);
cb_data
->
callback
();
delete
cb_data
;
},
cb_data
);
if
(
err
!=
cudaSuccess
)
{
std
::
cerr
<<
"cudaLaunchHostFunc failed: "
<<
cudaGetErrorString
(
err
)
<<
std
::
endl
;
throw
std
::
runtime_error
(
"cudaLaunchHostFunc failed"
);
}
}
// Example usage
int
main
()
{
try
{
CudaStreamManager
stream_manager
(
4
);
// Create a manager with 4 streams
// Prepare host and device memory
const
size_t
num_pages
=
10
;
std
::
vector
<
void
*>
host_mem_addresses
(
num_pages
);
std
::
vector
<
void
*>
device_mem_addresses
(
num_pages
);
std
::
vector
<
size_t
>
sizes
(
num_pages
,
4096
);
// 4KB pages
// Allocate host memory
for
(
size_t
i
=
0
;
i
<
num_pages
;
++
i
)
{
host_mem_addresses
[
i
]
=
malloc
(
4096
);
if
(
!
host_mem_addresses
[
i
])
{
throw
std
::
runtime_error
(
"Failed to allocate host memory"
);
}
// Initialize data if necessary
}
// Allocate device memory
for
(
size_t
i
=
0
;
i
<
num_pages
;
++
i
)
{
cudaError_t
err
=
cudaMalloc
(
&
device_mem_addresses
[
i
],
4096
);
if
(
err
!=
cudaSuccess
)
{
std
::
cerr
<<
"cudaMalloc failed: "
<<
cudaGetErrorString
(
err
)
<<
std
::
endl
;
throw
std
::
runtime_error
(
"cudaMalloc failed"
);
}
}
// Create a request
CudaStreamManager
::
Request
request
;
request
.
host_mem_addresses
=
host_mem_addresses
;
request
.
device_mem_addresses
=
device_mem_addresses
;
request
.
sizes
=
sizes
;
request
.
direction
=
cudaMemcpyHostToDevice
;
request
.
callback
=
[]()
{
std
::
cout
<<
"Data transfer completed!"
<<
std
::
endl
;
};
// Submit the request
stream_manager
.
submitRequest
(
request
);
// Wait for all streams to complete
cudaError_t
err
=
cudaDeviceSynchronize
();
if
(
err
!=
cudaSuccess
)
{
std
::
cerr
<<
"cudaDeviceSynchronize failed: "
<<
cudaGetErrorString
(
err
)
<<
std
::
endl
;
throw
std
::
runtime_error
(
"cudaDeviceSynchronize failed"
);
}
// Clean up
for
(
size_t
i
=
0
;
i
<
num_pages
;
++
i
)
{
free
(
host_mem_addresses
[
i
]);
cudaFree
(
device_mem_addresses
[
i
]);
}
}
catch
(
const
std
::
exception
&
e
)
{
std
::
cerr
<<
"Exception: "
<<
e
.
what
()
<<
std
::
endl
;
return
1
;
}
return
0
;
}
csrc/balance_serve/kvc2/test/test_cuda_stream_manager.cpp
0 → 100644
View file @
877aec85
#include "cuda_stream_manager.hh"
#include <cuda_runtime.h>
#include <functional>
#include <iostream>
#include <stdexcept>
#include <vector>
int
main
()
{
try
{
int
num_devices
=
0
;
cudaError_t
err
=
cudaGetDeviceCount
(
&
num_devices
);
if
(
err
!=
cudaSuccess
)
{
std
::
cerr
<<
"cudaGetDeviceCount failed: "
<<
cudaGetErrorString
(
err
)
<<
std
::
endl
;
return
1
;
}
if
(
num_devices
<
1
)
{
std
::
cerr
<<
"未找到 CUDA 设备。"
<<
std
::
endl
;
return
1
;
}
std
::
vector
<
size_t
>
device_ids
;
for
(
int
i
=
0
;
i
<
num_devices
;
++
i
)
{
device_ids
.
push_back
(
i
);
}
const
size_t
num_pages
=
10
;
const
size_t
page_size
=
4096
;
// 每页 4KB
// 创建 CudaStreamManager 实例,管理所有设备
CudaStreamManager
stream_manager
(
device_ids
,
4
);
// 准备主机内存和设备内存映射
std
::
vector
<
std
::
vector
<
void
*>>
host_mem_addresses
(
num_devices
);
std
::
vector
<
std
::
vector
<
void
*>>
device_mem_addresses
(
num_devices
);
// 分配主机内存
for
(
size_t
i
=
0
;
i
<
num_pages
;
++
i
)
{
void
*
host_ptr
=
malloc
(
page_size
);
if
(
!
host_ptr
)
{
throw
std
::
runtime_error
(
"Failed to allocate host memory"
);
}
// 如果需要,初始化数据
// 将相同的主机内存添加到每个设备的列表中
for
(
int
device_id
=
0
;
device_id
<
num_devices
;
++
device_id
)
{
host_mem_addresses
[
device_id
].
push_back
(
host_ptr
);
}
}
// 为每个设备分配设备内存
for
(
int
device_id
=
0
;
device_id
<
num_devices
;
++
device_id
)
{
err
=
cudaSetDevice
(
device_id
);
if
(
err
!=
cudaSuccess
)
{
std
::
cerr
<<
"cudaSetDevice failed: "
<<
cudaGetErrorString
(
err
)
<<
std
::
endl
;
throw
std
::
runtime_error
(
"cudaSetDevice failed"
);
}
for
(
size_t
i
=
0
;
i
<
num_pages
;
++
i
)
{
void
*
device_ptr
;
err
=
cudaMalloc
(
&
device_ptr
,
page_size
);
if
(
err
!=
cudaSuccess
)
{
std
::
cerr
<<
"cudaMalloc failed on device "
<<
device_id
<<
": "
<<
cudaGetErrorString
(
err
)
<<
std
::
endl
;
throw
std
::
runtime_error
(
"cudaMalloc failed"
);
}
device_mem_addresses
[
device_id
].
push_back
(
device_ptr
);
}
}
// 为每个设备创建并提交请求
for
(
int
device_id
=
0
;
device_id
<
num_devices
;
++
device_id
)
{
auto
request
=
std
::
shared_ptr
<
CudaStreamManager
::
Request
>
(
new
CudaStreamManager
::
Request
);
request
->
device_id
=
device_id
;
request
->
host_mem_addresses
=
host_mem_addresses
[
device_id
];
request
->
device_mem_addresses
=
device_mem_addresses
[
device_id
];
request
->
sizes
=
std
::
vector
<
size_t
>
(
num_pages
,
page_size
);
request
->
direction
=
cudaMemcpyHostToDevice
;
request
->
callback
=
[
device_id
]()
{
std
::
cout
<<
"Device "
<<
device_id
<<
" data transfer completed!"
<<
std
::
endl
;
};
stream_manager
.
submitRequest
(
request
);
}
// 等待一段时间,确保所有请求都被处理
// 在实际应用中,可以使用更好的同步机制
std
::
this_thread
::
sleep_for
(
std
::
chrono
::
seconds
(
5
));
// 清理主机内存
for
(
size_t
i
=
0
;
i
<
num_pages
;
++
i
)
{
free
(
host_mem_addresses
[
0
][
i
]);
// 所有设备共享相同的主机内存,只需释放一次
}
// 清理设备内存
for
(
int
device_id
=
0
;
device_id
<
num_devices
;
++
device_id
)
{
err
=
cudaSetDevice
(
device_id
);
if
(
err
!=
cudaSuccess
)
{
std
::
cerr
<<
"cudaSetDevice failed during cleanup: "
<<
cudaGetErrorString
(
err
)
<<
std
::
endl
;
continue
;
}
for
(
void
*
ptr
:
device_mem_addresses
[
device_id
])
{
cudaFree
(
ptr
);
}
}
}
catch
(
const
std
::
exception
&
e
)
{
std
::
cerr
<<
"异常: "
<<
e
.
what
()
<<
std
::
endl
;
return
1
;
}
return
0
;
}
Prev
1
2
3
4
5
6
7
8
…
13
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment