Unverified Commit c939644c authored by Shangyan Zhou's avatar Shangyan Zhou Committed by GitHub
Browse files

Support EP48/96 for internode kernels and update some config. (#421)

parent abba6add
...@@ -63,7 +63,9 @@ cfg.dynamicSmemBytes = smem_size; ...@@ -63,7 +63,9 @@ cfg.dynamicSmemBytes = smem_size;
switch (num_ranks / NUM_MAX_NVL_PEERS) { \ switch (num_ranks / NUM_MAX_NVL_PEERS) { \
case 2: case_macro(2); \ case 2: case_macro(2); \
case 4: case_macro(4); \ case 4: case_macro(4); \
case 6: case_macro(6); \
case 8: case_macro(8); \ case 8: case_macro(8); \
case 12: case_macro(12); \
case 16: case_macro(16); \ case 16: case_macro(16); \
case 18: case_macro(18); \ case 18: case_macro(18); \
case 20: case_macro(20); \ case 20: case_macro(20); \
......
...@@ -235,9 +235,11 @@ class Buffer: ...@@ -235,9 +235,11 @@ class Buffer:
8: Config(Buffer.num_sms, 6, 256, 6, 128), 8: Config(Buffer.num_sms, 6, 256, 6, 128),
16: Config(Buffer.num_sms, 36, 288, 20, 128), 16: Config(Buffer.num_sms, 36, 288, 20, 128),
24: Config(Buffer.num_sms, 8, 288, 32, 128), 24: Config(Buffer.num_sms, 8, 288, 32, 128),
32: Config(Buffer.num_sms, 32, 288, 32, 128), 32: Config(Buffer.num_sms, 32, 288, 8, 128),
64: Config(Buffer.num_sms, 20, 288, 28, 128), 48: Config(Buffer.num_sms, 32, 288, 8, 128),
128: Config(Buffer.num_sms, 20, 560, 32, 128), 64: Config(Buffer.num_sms, 32, 288, 8, 128),
96: Config(Buffer.num_sms, 20, 480, 12, 128),
128: Config(Buffer.num_sms, 20, 560, 12, 128),
144: Config(Buffer.num_sms, 32, 720, 12, 128), 144: Config(Buffer.num_sms, 32, 720, 12, 128),
160: Config(Buffer.num_sms, 28, 720, 12, 128), 160: Config(Buffer.num_sms, 28, 720, 12, 128),
} }
...@@ -264,8 +266,10 @@ class Buffer: ...@@ -264,8 +266,10 @@ class Buffer:
16: Config(Buffer.num_sms, 4, 288, 12, 128), 16: Config(Buffer.num_sms, 4, 288, 12, 128),
24: Config(Buffer.num_sms, 1, 288, 8, 128), 24: Config(Buffer.num_sms, 1, 288, 8, 128),
32: Config(Buffer.num_sms, 1, 288, 8, 128), 32: Config(Buffer.num_sms, 1, 288, 8, 128),
64: Config(Buffer.num_sms, 1, 288, 20, 128), 48: Config(Buffer.num_sms, 1, 288, 8, 128),
128: Config(Buffer.num_sms, 1, 560, 12, 128), 64: Config(Buffer.num_sms, 1, 288, 8, 128),
96: Config(Buffer.num_sms, 1, 480, 8, 128),
128: Config(Buffer.num_sms, 1, 560, 8, 128),
144: Config(Buffer.num_sms, 2, 720, 8, 128), 144: Config(Buffer.num_sms, 2, 720, 8, 128),
160: Config(Buffer.num_sms, 2, 720, 8, 128), 160: Config(Buffer.num_sms, 2, 720, 8, 128),
} }
......
...@@ -90,7 +90,7 @@ def test_main(args: argparse.Namespace, num_sms: int, ...@@ -90,7 +90,7 @@ def test_main(args: argparse.Namespace, num_sms: int,
time.sleep(1) time.sleep(1)
# Config # Config
rdma_buffer_size, nvl_buffer_size = 128, (720 if num_ranks in (144, 160) else 512) rdma_buffer_size, nvl_buffer_size = 128, (720 if num_ranks in (48, 96, 144, 160) else 512)
config = deep_ep.Config(num_sms, 8, nvl_buffer_size, 16, rdma_buffer_size) config = deep_ep.Config(num_sms, 8, nvl_buffer_size, 16, rdma_buffer_size)
# Test dispatch # Test dispatch
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment