Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
DeepEP
Commits
3e54b78f
Commit
3e54b78f
authored
Apr 22, 2025
by
Shangyan Zhou
Browse files
Normal kernels always use IBGDA mode.
parent
20b2aaaf
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
16 additions
and
21 deletions
+16
-21
csrc/kernels/runtime.cu
csrc/kernels/runtime.cu
+6
-9
deep_ep/buffer.py
deep_ep/buffer.py
+10
-12
No files found.
csrc/kernels/runtime.cu
View file @
3e54b78f
...
@@ -58,15 +58,12 @@ int init(const std::vector<uint8_t> &root_unique_id_val, int rank, int num_ranks
...
@@ -58,15 +58,12 @@ int init(const std::vector<uint8_t> &root_unique_id_val, int rank, int num_ranks
EP_HOST_ASSERT
(
cpu_rdma_team
!=
NVSHMEM_TEAM_INVALID
);
EP_HOST_ASSERT
(
cpu_rdma_team
!=
NVSHMEM_TEAM_INVALID
);
}
}
// Normal operations use IBRC, while low-latency operations use IBGDA
// TODO: we still use `nvshmem_barrier` under IBRC mode, which should be switch to IBGDA mode later
bool
internode_use_ibgda
=
true
;
if
(
low_latency_mode
or
internode_use_ibgda
)
{
nvshmemi_device_host_state_t
*
dev_state_ptr
=
nullptr
;
nvshmemi_device_host_state_t
*
dev_state_ptr
=
nullptr
;
CUDA_CHECK
(
cudaGetSymbolAddress
(
reinterpret_cast
<
void
**>
(
&
dev_state_ptr
),
nvshmemi_device_state_d
));
CUDA_CHECK
(
cudaGetSymbolAddress
(
reinterpret_cast
<
void
**>
(
&
dev_state_ptr
),
nvshmemi_device_state_d
));
bool
ibgda_is_initialized
=
false
;
bool
ibgda_is_initialized
=
false
;
CUDA_CHECK
(
cudaMemcpy
(
&
dev_state_ptr
->
ibgda_is_initialized
,
&
ibgda_is_initialized
,
sizeof
(
bool
),
cudaMemcpyHostToDevice
));
CUDA_CHECK
(
cudaMemcpy
(
&
dev_state_ptr
->
ibgda_is_initialized
,
&
ibgda_is_initialized
,
sizeof
(
bool
),
cudaMemcpyHostToDevice
));
}
nvshmem_barrier_all
();
nvshmem_barrier_all
();
return
nvshmem_my_pe
();
return
nvshmem_my_pe
();
}
}
...
...
deep_ep/buffer.py
View file @
3e54b78f
...
@@ -65,10 +65,8 @@ class Buffer:
...
@@ -65,10 +65,8 @@ class Buffer:
# Synchronize NVSHMEM unique IDs
# Synchronize NVSHMEM unique IDs
root_unique_id
=
None
root_unique_id
=
None
internode_use_ibgda
=
True
if
self
.
runtime
.
get_num_rdma_ranks
()
>
1
or
low_latency_mode
:
if
self
.
runtime
.
get_num_rdma_ranks
()
>
1
or
low_latency_mode
:
# Enable IBGDA for the low latency mode, which refers to "no package forwarding between NVLink and RDMA"
# Enable IBGDA
if
low_latency_mode
or
internode_use_ibgda
:
assert
num_qps_per_rank
>
0
assert
num_qps_per_rank
>
0
os
.
environ
[
'NVSHMEM_DISABLE_P2P'
]
=
'1'
os
.
environ
[
'NVSHMEM_DISABLE_P2P'
]
=
'1'
os
.
environ
[
'NVSHMEM_IB_ENABLE_IBGDA'
]
=
'1'
os
.
environ
[
'NVSHMEM_IB_ENABLE_IBGDA'
]
=
'1'
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment