Commit bdd119f8 authored by Shangyan Zhou's avatar Shangyan Zhou
Browse files

Update combine config

parent d72817eb
......@@ -247,7 +247,7 @@ class Buffer:
2: Config(Buffer.num_sms, 10, 256, 6, 128),
4: Config(Buffer.num_sms, 9, 256, 6, 128),
8: Config(Buffer.num_sms, 4, 256, 6, 128),
16: Config(Buffer.num_sms, 4, 288, 16, 128),
16: Config(Buffer.num_sms, 4, 288, 12, 128),
24: Config(Buffer.num_sms, 1, 288, 8, 128),
32: Config(Buffer.num_sms, 1, 288, 8, 128),
64: Config(Buffer.num_sms, 1, 288, 20, 128),
......
......@@ -209,8 +209,8 @@ def test_main(args: argparse.Namespace, num_sms: int,
# Tune combine performance
best_time, best_results = 1e10, None
for nvl_chunk_size in range(1, 13, 1):
for rdma_chunk_size in range(8, 33, 4):
for nvl_chunk_size in range(1, 8, 1):
for rdma_chunk_size in range(12 if num_nodes == 2 else 8, 33, 4):
config = deep_ep.Config(num_sms, nvl_chunk_size, nvl_buffer_size, rdma_chunk_size, rdma_buffer_size)
tune_args = {'x': recv_x, 'handle': handle, 'config': config}
t, notify_t = bench_kineto(lambda: buffer.combine(**tune_args), ('combine', 'notify'))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment