"openmmapi/src/PythonForce.cpp" did not exist on "aa96846eb06fc74182eca10d94313d4ca6adfbae"
Commit d24bbeba authored by alpha-baby's avatar alpha-baby
Browse files

opt code

parent b90320e2
...@@ -125,7 +125,7 @@ notify_dispatch(const int* num_tokens_per_rank, int* moe_recv_counter_mapped, in ...@@ -125,7 +125,7 @@ notify_dispatch(const int* num_tokens_per_rank, int* moe_recv_counter_mapped, in
// Issue send // Issue send
// TODO: more light fence or barrier or signaling // TODO: more light fence or barrier or signaling
// TODO: overlap EP barrier and NVL cleaning // TODO: overlap EP barrier and NVL cleaning
for (int i = warp_id; i < kNumRDMARanks; i+=num_threads/32) { for (int i = warp_id; i < kNumRDMARanks; i += num_warps) {
if (i != rdma_rank) { if (i != rdma_rank) {
nvshmemi_ibgda_put_nbi_warp<true>(reinterpret_cast<uint64_t>(rdma_recv_num_tokens_mixed.recv_buffer(rdma_rank)), nvshmemi_ibgda_put_nbi_warp<true>(reinterpret_cast<uint64_t>(rdma_recv_num_tokens_mixed.recv_buffer(rdma_rank)),
reinterpret_cast<uint64_t>(rdma_recv_num_tokens_mixed.send_buffer(i)), reinterpret_cast<uint64_t>(rdma_recv_num_tokens_mixed.send_buffer(i)),
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment