Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
DeepEP
Commits
1a35d640
Commit
1a35d640
authored
May 21, 2026
by
root
Browse files
fix dtk26.04 4nodes core dump.
Signed-off-by:
root
<
root@host-10-212-17-3.cluster.local
>
parent
95e46992
Changes
3
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
936 additions
and
952 deletions
+936
-952
csrc/config.hpp
csrc/config.hpp
+2
-2
csrc/deep_ep.cu
csrc/deep_ep.cu
+4
-4
csrc/kernels/internode.cu
csrc/kernels/internode.cu
+930
-946
No files found.
csrc/config.hpp
View file @
1a35d640
...
@@ -47,7 +47,7 @@ struct Config {
...
@@ -47,7 +47,7 @@ struct Config {
EP_HOST_ASSERT
(
num_ranks
<=
NUM_MAX_NVL_PEERS
or
num_sms
%
(
2
*
NUM_INTERNODE_DISPATCH_BLOCKS_PER_CHANNEL
)
==
0
);
EP_HOST_ASSERT
(
num_ranks
<=
NUM_MAX_NVL_PEERS
or
num_sms
%
(
2
*
NUM_INTERNODE_DISPATCH_BLOCKS_PER_CHANNEL
)
==
0
);
const
auto
num_rdma_ranks
=
std
::
max
(
num_ranks
/
NUM_MAX_NVL_PEERS
,
1
);
const
auto
num_rdma_ranks
=
std
::
max
(
num_ranks
/
NUM_MAX_NVL_PEERS
,
1
);
const
auto
num_nvl_ranks
=
std
::
min
(
num_ranks
,
NUM_MAX_NVL_PEERS
);
const
auto
num_nvl_ranks
=
std
::
min
(
num_ranks
,
NUM_MAX_NVL_PEERS
);
const
int
num_channels
=
num_
ranks
<=
8
?
num_sms
/
2
:
num_sms
/
NUM_INTERNODE_DISPATCH_BLOCKS_PER_CHANNEL
;
const
int
num_channels
=
num_
sms
/
2
;
// 计算每个nvl通信数据包的数据量
// 计算每个nvl通信数据包的数据量
size_t
num_single_nvl_bag_bytes
=
size_t
num_single_nvl_bag_bytes
=
...
@@ -83,7 +83,7 @@ struct Config {
...
@@ -83,7 +83,7 @@ struct Config {
EP_HOST_ASSERT
(
num_ranks
%
NUM_MAX_NVL_PEERS
==
0
);
EP_HOST_ASSERT
(
num_ranks
%
NUM_MAX_NVL_PEERS
==
0
);
EP_HOST_ASSERT
(
num_sms
%
NUM_INTERNODE_DISPATCH_BLOCKS_PER_CHANNEL
==
0
);
EP_HOST_ASSERT
(
num_sms
%
NUM_INTERNODE_DISPATCH_BLOCKS_PER_CHANNEL
==
0
);
const
int
num_rdma_ranks
=
num_ranks
/
NUM_MAX_NVL_PEERS
;
const
int
num_rdma_ranks
=
num_ranks
/
NUM_MAX_NVL_PEERS
;
const
int
num_channels
=
num_sms
/
NUM_INTERNODE_DISPATCH_BLOCKS_PER_CHANNEL
;
const
int
num_channels
=
num_sms
/
2
;
// 计算每个rdma通信数据包的数据量
// 计算每个rdma通信数据包的数据量
size_t
num_single_rdma_bag_bytes
=
size_t
num_single_rdma_bag_bytes
=
...
...
csrc/deep_ep.cu
View file @
1a35d640
...
@@ -809,8 +809,8 @@ Buffer::internode_dispatch(const torch::Tensor &x, const std::optional<torch::Te
...
@@ -809,8 +809,8 @@ Buffer::internode_dispatch(const torch::Tensor &x, const std::optional<torch::Te
// here.
// here.
pybind11
::
gil_scoped_release
release
;
pybind11
::
gil_scoped_release
release
;
const
int
num_channels
=
config
.
num_sms
/
NUM_INTERNODE_DISPATCH_BLOCKS_PER_CHANNEL
;
const
int
num_channels
=
config
.
num_sms
/
2
;
EP_HOST_ASSERT
(
config
.
num_sms
%
NUM_INTERNODE_DISPATCH_BLOCKS_PER_CHANNEL
==
0
);
//
EP_HOST_ASSERT(config.num_sms % NUM_INTERNODE_DISPATCH_BLOCKS_PER_CHANNEL == 0);
EP_HOST_ASSERT
(
0
<
get_num_rdma_ranks
()
and
get_num_rdma_ranks
()
<=
NUM_MAX_RDMA_PEERS
);
EP_HOST_ASSERT
(
0
<
get_num_rdma_ranks
()
and
get_num_rdma_ranks
()
<=
NUM_MAX_RDMA_PEERS
);
bool
cached_mode
=
cached_rdma_channel_prefix_matrix
.
has_value
();
bool
cached_mode
=
cached_rdma_channel_prefix_matrix
.
has_value
();
...
@@ -1130,8 +1130,8 @@ Buffer::internode_combine(
...
@@ -1130,8 +1130,8 @@ Buffer::internode_combine(
const
torch
::
Tensor
&
combined_nvl_head
,
const
Config
&
config
,
const
torch
::
Tensor
&
combined_nvl_head
,
const
Config
&
config
,
std
::
optional
<
EventHandle
>
&
previous_event
,
bool
async
,
bool
allocate_on_comm_stream
)
{
std
::
optional
<
EventHandle
>
&
previous_event
,
bool
async
,
bool
allocate_on_comm_stream
)
{
#ifndef DISABLE_ROCSHMEM
#ifndef DISABLE_ROCSHMEM
const
int
num_channels
=
config
.
num_sms
/
NUM_INTERNODE_DISPATCH_BLOCKS_PER_CHANNEL
;
const
int
num_channels
=
config
.
num_sms
/
2
;
EP_HOST_ASSERT
(
config
.
num_sms
%
NUM_INTERNODE_DISPATCH_BLOCKS_PER_CHANNEL
==
0
);
//
EP_HOST_ASSERT(config.num_sms % NUM_INTERNODE_DISPATCH_BLOCKS_PER_CHANNEL == 0);
// Shape and contiguous checks
// Shape and contiguous checks
EP_HOST_ASSERT
(
x
.
dim
()
==
2
and
x
.
is_contiguous
());
EP_HOST_ASSERT
(
x
.
dim
()
==
2
and
x
.
is_contiguous
());
...
...
csrc/kernels/internode.cu
View file @
1a35d640
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment