Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
vllm_cscc
Commits
7cd288a4
Unverified
Commit
7cd288a4
authored
Dec 24, 2025
by
skaraban3807
Committed by
GitHub
Dec 24, 2025
Browse files
[PERF] Add interleaved memory allocation to NUMA module (#30800)
parent
d2018073
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
38 additions
and
13 deletions
+38
-13
csrc/cpu/utils.cpp
csrc/cpu/utils.cpp
+38
-13
No files found.
csrc/cpu/utils.cpp
View file @
7cd288a4
...
@@ -24,6 +24,8 @@ std::string init_cpu_threads_env(const std::string& cpu_ids) {
...
@@ -24,6 +24,8 @@ std::string init_cpu_threads_env(const std::string& cpu_ids) {
#ifndef VLLM_NUMA_DISABLED
#ifndef VLLM_NUMA_DISABLED
std
::
string
init_cpu_threads_env
(
const
std
::
string
&
cpu_ids
)
{
std
::
string
init_cpu_threads_env
(
const
std
::
string
&
cpu_ids
)
{
bitmask
*
omp_cpu_mask
=
numa_parse_cpustring_all
(
cpu_ids
.
c_str
());
bitmask
*
omp_cpu_mask
=
numa_parse_cpustring_all
(
cpu_ids
.
c_str
());
TORCH_CHECK
(
omp_cpu_mask
!=
nullptr
,
"Failed to parse CPU string: "
+
cpu_ids
);
TORCH_CHECK
(
omp_cpu_mask
->
size
>
0
);
TORCH_CHECK
(
omp_cpu_mask
->
size
>
0
);
std
::
vector
<
int
>
omp_cpu_ids
;
std
::
vector
<
int
>
omp_cpu_ids
;
omp_cpu_ids
.
reserve
(
omp_cpu_mask
->
size
);
omp_cpu_ids
.
reserve
(
omp_cpu_mask
->
size
);
...
@@ -44,20 +46,12 @@ std::string init_cpu_threads_env(const std::string& cpu_ids) {
...
@@ -44,20 +46,12 @@ std::string init_cpu_threads_env(const std::string& cpu_ids) {
// Memory node binding
// Memory node binding
if
(
numa_available
()
!=
-
1
)
{
if
(
numa_available
()
!=
-
1
)
{
int
mem_node_id
=
numa_node_of_cpu
(
omp_cpu_ids
.
front
());
std
::
set
<
int
>
node_ids
;
std
::
set
<
int
>
node_ids
;
for
(
const
auto
&
cpu_id
:
omp_cpu_ids
)
{
for
(
const
auto
&
cpu_id
:
omp_cpu_ids
)
{
int
node_id
=
numa_node_of_cpu
(
cpu_id
);
int
node_id
=
numa_node_of_cpu
(
cpu_id
);
if
(
node_id
!=
-
1
)
{
if
(
node_id
!=
-
1
)
{
node_ids
.
insert
(
node_id
);
node_ids
.
insert
(
node_id
);
}
}
if
(
node_id
!=
mem_node_id
)
{
TORCH_WARN
(
"CPU "
,
cpu_id
,
" is on NUMA node "
,
node_id
,
", but CPU "
,
omp_cpu_ids
.
front
(),
" is on NUMA node "
,
mem_node_id
,
". All CPUs should be on the same NUMA node for optimal "
"performance. Memory will be bound to NUMA node "
,
mem_node_id
,
"."
);
}
}
}
// Concatenate all node_ids into a single comma-separated string
// Concatenate all node_ids into a single comma-separated string
if
(
!
node_ids
.
empty
())
{
if
(
!
node_ids
.
empty
())
{
...
@@ -70,7 +64,7 @@ std::string init_cpu_threads_env(const std::string& cpu_ids) {
...
@@ -70,7 +64,7 @@ std::string init_cpu_threads_env(const std::string& cpu_ids) {
}
}
bitmask
*
mask
=
numa_parse_nodestring
(
node_ids_str
.
c_str
());
bitmask
*
mask
=
numa_parse_nodestring
(
node_ids_str
.
c_str
());
bitmask
*
src_mask
=
numa_get_mem
bin
d
();
bitmask
*
src_mask
=
numa_get_mem
s_allowe
d
();
int
pid
=
getpid
();
int
pid
=
getpid
();
...
@@ -83,14 +77,45 @@ std::string init_cpu_threads_env(const std::string& cpu_ids) {
...
@@ -83,14 +77,45 @@ std::string init_cpu_threads_env(const std::string& cpu_ids) {
std
::
to_string
(
errno
));
std
::
to_string
(
errno
));
}
}
// restrict memory allocation node.
// Restrict memory allocation to the selected NUMA node(s).
// Enhances memory locality for the threads bound to those NUMA CPUs.
if
(
node_ids
.
size
()
>
1
)
{
errno
=
0
;
numa_set_interleave_mask
(
mask
);
if
(
errno
!=
0
)
{
TORCH_WARN
(
"numa_set_interleave_mask failed. errno: "
+
std
::
to_string
(
errno
));
}
else
{
TORCH_WARN
(
"NUMA binding: Using INTERLEAVE policy for memory "
"allocation across multiple NUMA nodes (nodes: "
+
node_ids_str
+
"). Memory allocations will be "
"interleaved across the specified NUMA nodes."
);
}
}
else
{
errno
=
0
;
numa_set_membind
(
mask
);
numa_set_membind
(
mask
);
if
(
errno
!=
0
)
{
TORCH_WARN
(
"numa_set_membind failed. errno: "
+
std
::
to_string
(
errno
));
}
else
{
TORCH_WARN
(
"NUMA binding: Using MEMBIND policy for memory "
"allocation on the NUMA nodes ("
+
node_ids_str
+
"). Memory allocations will be "
"strictly bound to these NUMA nodes."
);
}
}
numa_set_strict
(
1
);
numa_set_strict
(
1
);
numa_free_nodemask
(
mask
);
numa_free_nodemask
(
mask
);
numa_free_nodemask
(
src_mask
);
numa_free_nodemask
(
src_mask
);
}
else
{
}
else
{
TORCH_WARN
(
"numa_parse_nodestring or numa_get_membind failed. errno: "
+
TORCH_WARN
(
"numa_parse_nodestring or numa_get_run_node_mask failed. errno: "
+
std
::
to_string
(
errno
));
std
::
to_string
(
errno
));
}
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment