Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
DeepEP
Commits
eef7ab50
Unverified
Commit
eef7ab50
authored
Jul 07, 2025
by
Zhean Xu
Committed by
GitHub
Jul 07, 2025
Browse files
feat: support cluster size 2 (#283)
Co-authored-by:
Zhean Xu
<
xza@deepseek.com
>
parent
e6d61fc6
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
9 additions
and
5 deletions
+9
-5
csrc/kernels/launch.cuh
csrc/kernels/launch.cuh
+9
-5
No files found.
csrc/kernels/launch.cuh
View file @
eef7ab50
...
@@ -7,11 +7,15 @@
...
@@ -7,11 +7,15 @@
#ifndef DISABLE_SM90_FEATURES
#ifndef DISABLE_SM90_FEATURES
#define SETUP_LAUNCH_CONFIG(num_sms, num_threads, stream) \
#define SETUP_LAUNCH_CONFIG(num_sms, num_threads, stream) \
cudaLaunchConfig_t cfg = {(num_sms), (num_threads), 0, stream, nullptr, 0}; \
cudaLaunchConfig_t cfg = {(num_sms), (num_threads), 0, stream, nullptr, 0}; \
cudaLaunchAttribute attr[
1
]; \
cudaLaunchAttribute attr[
2
]; \
attr[0].id = cudaLaunchAttributeCooperative; \
attr[0].id = cudaLaunchAttributeCooperative; \
attr[0].val.cooperative = 1; \
attr[0].val.cooperative = 1; \
attr[1].id = cudaLaunchAttributeClusterDimension; \
attr[1].val.clusterDim.x = (num_sms % 2 == 0 ? 2 : 1); \
attr[1].val.clusterDim.y = 1; \
attr[1].val.clusterDim.z = 1; \
cfg.attrs = attr; \
cfg.attrs = attr; \
cfg.numAttrs =
1
cfg.numAttrs =
2
#else
#else
#define SETUP_LAUNCH_CONFIG(sms, threads, stream) \
#define SETUP_LAUNCH_CONFIG(sms, threads, stream) \
int __num_sms = (sms); \
int __num_sms = (sms); \
...
@@ -69,13 +73,13 @@ cfg.dynamicSmemBytes = smem_size;
...
@@ -69,13 +73,13 @@ cfg.dynamicSmemBytes = smem_size;
case 2: case_macro(dtype, 2); \
case 2: case_macro(dtype, 2); \
case 4: case_macro(dtype, 4); \
case 4: case_macro(dtype, 4); \
case 8: case_macro(dtype, 8); \
case 8: case_macro(dtype, 8); \
default: EP_HOST_ASSERT(false
&&
"Unsupported ranks"); \
default: EP_HOST_ASSERT(false
and
"Unsupported ranks"); \
} while (false)
} while (false)
#define SWITCH_TYPES(case_macro) \
#define SWITCH_TYPES(case_macro) \
switch (type) { \
switch (type) { \
case CUDA_R_16BF: case_macro(nv_bfloat16); \
case CUDA_R_16BF: case_macro(nv_bfloat16); \
default: EP_HOST_ASSERT(false
&&
"Unsupported type"); \
default: EP_HOST_ASSERT(false
and
"Unsupported type"); \
} while (false)
} while (false)
#define SWITCH_HIDDEN(case_macro) \
#define SWITCH_HIDDEN(case_macro) \
...
@@ -86,5 +90,5 @@ cfg.dynamicSmemBytes = smem_size;
...
@@ -86,5 +90,5 @@ cfg.dynamicSmemBytes = smem_size;
case 5120: case_macro(5120); \
case 5120: case_macro(5120); \
case 7168: case_macro(7168); \
case 7168: case_macro(7168); \
case 8192: case_macro(8192); \
case 8192: case_macro(8192); \
default: EP_HOST_ASSERT(false
&&
"Unsupported hidden"); \
default: EP_HOST_ASSERT(false
and
"Unsupported hidden"); \
} while (false)
} while (false)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment