Commit e6d61fc6 authored by Shangyan Zhou's avatar Shangyan Zhou
Browse files

Update some dispatch configs.

parent a2fa3b73
...@@ -203,9 +203,9 @@ class Buffer: ...@@ -203,9 +203,9 @@ class Buffer:
2: Config(Buffer.num_sms, 24, 256, 6, 128), 2: Config(Buffer.num_sms, 24, 256, 6, 128),
4: Config(Buffer.num_sms, 6, 256, 6, 128), 4: Config(Buffer.num_sms, 6, 256, 6, 128),
8: Config(Buffer.num_sms, 6, 256, 6, 128), 8: Config(Buffer.num_sms, 6, 256, 6, 128),
16: Config(Buffer.num_sms, 16, 288, 20, 128), 16: Config(Buffer.num_sms, 36, 288, 20, 128),
24: Config(Buffer.num_sms, 8, 288, 32, 128), 24: Config(Buffer.num_sms, 8, 288, 32, 128),
32: Config(Buffer.num_sms, 8, 288, 32, 128), 32: Config(Buffer.num_sms, 32, 288, 32, 128),
64: Config(Buffer.num_sms, 20, 288, 28, 128), 64: Config(Buffer.num_sms, 20, 288, 28, 128),
128: Config(Buffer.num_sms, 20, 560, 32, 128), 128: Config(Buffer.num_sms, 20, 560, 32, 128),
144: Config(Buffer.num_sms, 32, 720, 12, 128), 144: Config(Buffer.num_sms, 32, 720, 12, 128),
......
...@@ -181,7 +181,7 @@ def test_main(args: argparse.Namespace, num_sms: int, ...@@ -181,7 +181,7 @@ def test_main(args: argparse.Namespace, num_sms: int,
best_time, best_results = 1e10, None best_time, best_results = 1e10, None
rdma_send_bytes = (dispatch_bf16_rdma_send_bytes * fp8_factor) if isinstance(current_x, tuple) else dispatch_bf16_rdma_send_bytes rdma_send_bytes = (dispatch_bf16_rdma_send_bytes * fp8_factor) if isinstance(current_x, tuple) else dispatch_bf16_rdma_send_bytes
nvl_recv_bytes = (dispatch_bf16_nvl_recv_bytes * fp8_factor) if isinstance(current_x, tuple) else dispatch_bf16_nvl_recv_bytes nvl_recv_bytes = (dispatch_bf16_nvl_recv_bytes * fp8_factor) if isinstance(current_x, tuple) else dispatch_bf16_nvl_recv_bytes
for nvl_chunk_size in range(4, 33, 4): for nvl_chunk_size in range(4, 45, 4):
for rdma_chunk_size in range(4, 33, 4): for rdma_chunk_size in range(4, 33, 4):
config = deep_ep.Config(num_sms, nvl_chunk_size, nvl_buffer_size, rdma_chunk_size, rdma_buffer_size) config = deep_ep.Config(num_sms, nvl_chunk_size, nvl_buffer_size, rdma_chunk_size, rdma_buffer_size)
tune_args = {'x': current_x, 'handle': handle, 'config': config} tune_args = {'x': current_x, 'handle': handle, 'config': config}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment