Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
jerrrrry
infinilm
Commits
144ba492
Commit
144ba492
authored
Jan 30, 2026
by
PanZezhong
Committed by
wooway777
Feb 10, 2026
Browse files
issue/143 fix bench script, worker cleanup, compiler initial input
parent
69f18760
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
58 additions
and
14 deletions
+58
-14
csrc/engine/compiler/paged_compiler.cpp
csrc/engine/compiler/paged_compiler.cpp
+14
-0
csrc/engine/rank_worker.cpp
csrc/engine/rank_worker.cpp
+23
-14
examples/bench.py
examples/bench.py
+21
-0
No files found.
csrc/engine/compiler/paged_compiler.cpp
View file @
144ba492
#include "paged_compiler.hpp"
#include "paged_compiler.hpp"
namespace
{
// Todo: replace with Tensor::zeros when it is available
inline
void
set_zeros
(
infinicore
::
Tensor
&
tensor
)
{
std
::
vector
<
uint8_t
>
zeros
(
tensor
->
nbytes
(),
0
);
infinicore
::
context
::
memcpyH2D
(
tensor
->
data
(),
zeros
.
data
(),
tensor
->
nbytes
(),
false
);
}
}
// namespace
namespace
infinilm
::
engine
{
namespace
infinilm
::
engine
{
PagedCompiler
::
PagedCompiler
(
const
std
::
shared_ptr
<
InfinilmModel
>
&
model
,
RankBarrier
*
barrier
)
PagedCompiler
::
PagedCompiler
(
const
std
::
shared_ptr
<
InfinilmModel
>
&
model
,
RankBarrier
*
barrier
)
:
GraphCompiler
(
model
,
barrier
)
{
:
GraphCompiler
(
model
,
barrier
)
{
...
@@ -27,15 +35,20 @@ void PagedCompiler::compile() {
...
@@ -27,15 +35,20 @@ void PagedCompiler::compile() {
compiled_map_decode_
.
clear
();
compiled_map_decode_
.
clear
();
block_tables_holder_
=
infinicore
::
Tensor
::
empty
(
block_tables_holder_
=
infinicore
::
Tensor
::
empty
(
{
nblocks
},
infinicore
::
DataType
::
I64
,
infinicore
::
context
::
getDevice
());
{
nblocks
},
infinicore
::
DataType
::
I64
,
infinicore
::
context
::
getDevice
());
set_zeros
(
block_tables_holder_
);
for
(
size_t
b
:
decode_batch_sizes_
)
{
for
(
size_t
b
:
decode_batch_sizes_
)
{
size_t
block_per_req
=
nblocks
/
b
;
size_t
block_per_req
=
nblocks
/
b
;
InfinilmModel
::
Input
input
;
InfinilmModel
::
Input
input
;
input
.
input_ids
=
infinicore
::
Tensor
::
empty
({
1
,
b
},
infinicore
::
DataType
::
I64
,
infinicore
::
context
::
getDevice
());
input
.
input_ids
=
infinicore
::
Tensor
::
empty
({
1
,
b
},
infinicore
::
DataType
::
I64
,
infinicore
::
context
::
getDevice
());
input
.
position_ids
=
infinicore
::
Tensor
::
empty
({
b
},
infinicore
::
DataType
::
I64
,
infinicore
::
context
::
getDevice
());
input
.
position_ids
=
infinicore
::
Tensor
::
empty
({
b
},
infinicore
::
DataType
::
I64
,
infinicore
::
context
::
getDevice
());
input
.
total_sequence_lengths
=
infinicore
::
Tensor
::
empty
({
b
},
infinicore
::
DataType
::
I64
,
infinicore
::
context
::
getDevice
());
input
.
total_sequence_lengths
=
infinicore
::
Tensor
::
empty
({
b
},
infinicore
::
DataType
::
I64
,
infinicore
::
context
::
getDevice
());
set_zeros
(
input
.
input_ids
.
value
());
set_zeros
(
input
.
position_ids
.
value
());
set_zeros
(
input
.
total_sequence_lengths
.
value
());
std
::
vector
<
int64_t
>
total_sequence_lengths_vec
(
b
,
1
);
std
::
vector
<
int64_t
>
total_sequence_lengths_vec
(
b
,
1
);
infinicore
::
context
::
memcpyH2D
(
input
.
total_sequence_lengths
.
value
()
->
data
(),
total_sequence_lengths_vec
.
data
(),
b
*
sizeof
(
int64_t
),
false
);
infinicore
::
context
::
memcpyH2D
(
input
.
total_sequence_lengths
.
value
()
->
data
(),
total_sequence_lengths_vec
.
data
(),
b
*
sizeof
(
int64_t
),
false
);
input
.
input_offsets
=
infinicore
::
Tensor
::
empty
({
b
+
1
},
infinicore
::
DataType
::
I64
,
infinicore
::
context
::
getDevice
());
input
.
input_offsets
=
infinicore
::
Tensor
::
empty
({
b
+
1
},
infinicore
::
DataType
::
I64
,
infinicore
::
context
::
getDevice
());
set_zeros
(
input
.
input_offsets
.
value
());
std
::
vector
<
int64_t
>
input_offsets_vec
(
b
+
1
,
0
);
std
::
vector
<
int64_t
>
input_offsets_vec
(
b
+
1
,
0
);
for
(
size_t
i
=
0
;
i
<=
b
;
i
++
)
{
for
(
size_t
i
=
0
;
i
<=
b
;
i
++
)
{
input_offsets_vec
[
i
]
=
i
;
input_offsets_vec
[
i
]
=
i
;
...
@@ -43,6 +56,7 @@ void PagedCompiler::compile() {
...
@@ -43,6 +56,7 @@ void PagedCompiler::compile() {
infinicore
::
context
::
memcpyH2D
(
input
.
input_offsets
.
value
()
->
data
(),
input_offsets_vec
.
data
(),
(
b
+
1
)
*
sizeof
(
int64_t
),
false
);
infinicore
::
context
::
memcpyH2D
(
input
.
input_offsets
.
value
()
->
data
(),
input_offsets_vec
.
data
(),
(
b
+
1
)
*
sizeof
(
int64_t
),
false
);
input
.
block_tables
=
block_tables_holder_
->
as_strided
({
b
,
block_per_req
},
{(
ptrdiff_t
)
block_per_req
,
1
});
input
.
block_tables
=
block_tables_holder_
->
as_strided
({
b
,
block_per_req
},
{(
ptrdiff_t
)
block_per_req
,
1
});
input
.
slot_mapping
=
infinicore
::
Tensor
::
empty
({
b
},
infinicore
::
DataType
::
I64
,
infinicore
::
context
::
getDevice
());
input
.
slot_mapping
=
infinicore
::
Tensor
::
empty
({
b
},
infinicore
::
DataType
::
I64
,
infinicore
::
context
::
getDevice
());
set_zeros
(
input
.
slot_mapping
.
value
());
barrier_
->
wait
();
barrier_
->
wait
();
infinicore
::
context
::
startGraphRecording
();
infinicore
::
context
::
startGraphRecording
();
...
...
csrc/engine/rank_worker.cpp
View file @
144ba492
...
@@ -246,12 +246,12 @@ void RankWorker::thread_loop() {
...
@@ -246,12 +246,12 @@ void RankWorker::thread_loop() {
try
{
try
{
model_
->
load_parameter
(
local_param_name
,
local_param
);
model_
->
load_parameter
(
local_param_name
,
local_param
);
}
catch
(
const
std
::
exception
&
e
)
{
}
catch
(
const
std
::
exception
&
e
)
{
// convert exceptions to a safe behavior: set should_exit_ and notify caller
{
std
::
lock_guard
<
std
::
mutex
>
lk
(
mutex_
);
std
::
lock_guard
<
std
::
mutex
>
lk
(
mutex_
);
should_exit_
=
true
;
should_exit_
=
true
;
job_done_
=
true
;
job_done_
=
true
;
}
cv_
.
notify_all
();
cv_
.
notify_all
();
// rethrow so the thread can be joined and caller sees an error if desired (optional)
spdlog
::
error
(
"[{}] exception during load_parameter_: {}
\n
"
,
info
(),
e
.
what
());
spdlog
::
error
(
"[{}] exception during load_parameter_: {}
\n
"
,
info
(),
e
.
what
());
break
;
break
;
}
}
...
@@ -321,9 +321,11 @@ void RankWorker::thread_loop() {
...
@@ -321,9 +321,11 @@ void RankWorker::thread_loop() {
cv_
.
notify_all
();
cv_
.
notify_all
();
}
catch
(
const
std
::
exception
&
e
)
{
}
catch
(
const
std
::
exception
&
e
)
{
std
::
lock_guard
<
std
::
mutex
>
lk
(
mutex_
);
{
should_exit_
=
true
;
std
::
lock_guard
<
std
::
mutex
>
lk
(
mutex_
);
job_done_
=
true
;
should_exit_
=
true
;
job_done_
=
true
;
}
cv_
.
notify_all
();
cv_
.
notify_all
();
spdlog
::
error
(
"[{}] exception during forward: {}
\n
"
,
info
(),
e
.
what
());
spdlog
::
error
(
"[{}] exception during forward: {}
\n
"
,
info
(),
e
.
what
());
break
;
break
;
...
@@ -338,9 +340,11 @@ void RankWorker::thread_loop() {
...
@@ -338,9 +340,11 @@ void RankWorker::thread_loop() {
cv_
.
notify_all
();
cv_
.
notify_all
();
}
catch
(
const
std
::
exception
&
e
)
{
}
catch
(
const
std
::
exception
&
e
)
{
std
::
lock_guard
<
std
::
mutex
>
lk
(
mutex_
);
{
should_exit_
=
true
;
std
::
lock_guard
<
std
::
mutex
>
lk
(
mutex_
);
job_done_
=
true
;
should_exit_
=
true
;
job_done_
=
true
;
}
cv_
.
notify_all
();
cv_
.
notify_all
();
spdlog
::
error
(
"[{}] exception during reset_cache: {}
\n
"
,
info
(),
e
.
what
());
spdlog
::
error
(
"[{}] exception during reset_cache: {}
\n
"
,
info
(),
e
.
what
());
break
;
break
;
...
@@ -357,9 +361,11 @@ void RankWorker::thread_loop() {
...
@@ -357,9 +361,11 @@ void RankWorker::thread_loop() {
cv_
.
notify_all
();
cv_
.
notify_all
();
}
catch
(
const
std
::
exception
&
e
)
{
}
catch
(
const
std
::
exception
&
e
)
{
std
::
lock_guard
<
std
::
mutex
>
lk
(
mutex_
);
{
should_exit_
=
true
;
std
::
lock_guard
<
std
::
mutex
>
lk
(
mutex_
);
job_done_
=
true
;
should_exit_
=
true
;
job_done_
=
true
;
}
cv_
.
notify_all
();
cv_
.
notify_all
();
spdlog
::
error
(
"[{}] exception during compile: {}
\n
"
,
info
(),
e
.
what
());
spdlog
::
error
(
"[{}] exception during compile: {}
\n
"
,
info
(),
e
.
what
());
break
;
break
;
...
@@ -369,6 +375,9 @@ void RankWorker::thread_loop() {
...
@@ -369,6 +375,9 @@ void RankWorker::thread_loop() {
// Shouldn't reach here (no-op)
// Shouldn't reach here (no-op)
}
}
}
// while
}
// while
// Some clean up should be done before exiting the thread
compiler_
.
reset
();
}
catch
(
const
std
::
exception
&
e
)
{
}
catch
(
const
std
::
exception
&
e
)
{
// Top-level exception: ensure any waiters are woken and the thread exits cleanly.
// Top-level exception: ensure any waiters are woken and the thread exits cleanly.
{
{
...
...
examples/bench.py
View file @
144ba492
...
@@ -137,6 +137,21 @@ def get_args():
...
@@ -137,6 +137,21 @@ def get_args():
action
=
"store_true"
,
action
=
"store_true"
,
help
=
"Run nvidia test"
,
help
=
"Run nvidia test"
,
)
)
parser
.
add_argument
(
"--metax"
,
action
=
"store_true"
,
help
=
"Run metax test"
,
)
parser
.
add_argument
(
"--moore"
,
action
=
"store_true"
,
help
=
"Run moore test"
,
)
parser
.
add_argument
(
"--iluvatar"
,
action
=
"store_true"
,
help
=
"Run iluvatar test"
,
)
parser
.
add_argument
(
parser
.
add_argument
(
"--cambricon"
,
"--cambricon"
,
action
=
"store_true"
,
action
=
"store_true"
,
...
@@ -328,6 +343,12 @@ if __name__ == "__main__":
...
@@ -328,6 +343,12 @@ if __name__ == "__main__":
device_str
=
"cpu"
device_str
=
"cpu"
elif
args
.
nvidia
:
elif
args
.
nvidia
:
device_str
=
"cuda"
device_str
=
"cuda"
elif
args
.
metax
:
device_str
=
"cuda"
elif
args
.
moore
:
device_str
=
"musa"
elif
args
.
iluvatar
:
device_str
=
"cuda"
elif
args
.
cambricon
:
elif
args
.
cambricon
:
device_str
=
"mlu"
device_str
=
"mlu"
else
:
else
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment