Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
wh1225
rccl-auto-tuning
Commits
af903c20
Commit
af903c20
authored
Apr 02, 2026
by
wanghan
Browse files
Update
parent
08b50ba4
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
0 additions
and
50 deletions
+0
-50
rccl-dtk-25.04/src/graph/tuning.cc
rccl-dtk-25.04/src/graph/tuning.cc
+0
-50
No files found.
rccl-dtk-25.04/src/graph/tuning.cc
View file @
af903c20
...
@@ -554,56 +554,6 @@ ncclResult_t ncclTopoTuneModel(struct ncclComm* comm, int minCompCap, int maxCom
...
@@ -554,56 +554,6 @@ ncclResult_t ncclTopoTuneModel(struct ncclComm* comm, int minCompCap, int maxCom
ncclResult_t
ncclTopoGetAlgoTime
(
struct
ncclInfo
*
info
,
int
algorithm
,
int
protocol
,
int
numPipeOps
,
float
*
time
)
{
ncclResult_t
ncclTopoGetAlgoTime
(
struct
ncclInfo
*
info
,
int
algorithm
,
int
protocol
,
int
numPipeOps
,
float
*
time
)
{
float
bw
=
info
->
comm
->
bandwidths
[
info
->
coll
][
algorithm
][
protocol
];
float
bw
=
info
->
comm
->
bandwidths
[
info
->
coll
][
algorithm
][
protocol
];
float
lat
=
info
->
comm
->
latencies
[
info
->
coll
][
algorithm
][
protocol
];
float
lat
=
info
->
comm
->
latencies
[
info
->
coll
][
algorithm
][
protocol
];
// 根据 qz.txt 性能数据自动调整执行时间
// 支持多个 nRanks 配置的优化
// 调整策略:
// - diff≥8%: size_range = [original×0.5, original×2.0]
// - diff<8%: size_range = [original×0.75, original×1.5]
// 优化配置: nRanks == 8
if
(
info
->
comm
->
nRanks
==
8
)
{
// ncclFuncAllReduce NCCL_ALGO_TREE NCCL_PROTO_LL: 8~1024 bytes (原始 16~512)
if
(
info
->
coll
==
ncclFuncAllReduce
&&
algorithm
==
NCCL_ALGO_TREE
&&
protocol
==
NCCL_PROTO_LL
&&
info
->
nBytes
>
8
&&
info
->
nBytes
<=
1024
)
{
*
time
=
0
;
return
ncclSuccess
;
}
// ncclFuncAllReduce NCCL_ALGO_TREE NCCL_PROTO_LL: 1536~196608 bytes (原始 2048~131072)
if
(
info
->
coll
==
ncclFuncAllReduce
&&
algorithm
==
NCCL_ALGO_TREE
&&
protocol
==
NCCL_PROTO_LL
&&
info
->
nBytes
>
1536
&&
info
->
nBytes
<=
196608
)
{
*
time
=
0
;
return
ncclSuccess
;
}
// ncclFuncAllReduce NCCL_ALGO_RING NCCL_PROTO_LL: 262144~4194304 bytes (原始 524288~2097152)
if
(
info
->
coll
==
ncclFuncAllReduce
&&
algorithm
==
NCCL_ALGO_RING
&&
protocol
==
NCCL_PROTO_LL
&&
info
->
nBytes
>
262144
&&
info
->
nBytes
<=
4194304
)
{
*
time
=
0
;
return
ncclSuccess
;
}
// ncclFuncAllReduce NCCL_ALGO_RING NCCL_PROTO_SIMPLE: 2097152~2147483648 bytes (原始 4194304~1073741824)
if
(
info
->
coll
==
ncclFuncAllReduce
&&
algorithm
==
NCCL_ALGO_RING
&&
protocol
==
NCCL_PROTO_SIMPLE
&&
info
->
nBytes
>
2097152
&&
info
->
nBytes
<=
2147483648
)
{
*
time
=
0
;
return
ncclSuccess
;
}
// ncclFuncBroadcast NCCL_ALGO_RING NCCL_PROTO_SIMPLE: 6144~262144 bytes (原始 8192~131072)
if
(
info
->
coll
==
ncclFuncBroadcast
&&
algorithm
==
NCCL_ALGO_RING
&&
protocol
==
NCCL_PROTO_SIMPLE
&&
info
->
nBytes
>
6144
&&
info
->
nBytes
<=
262144
)
{
*
time
=
0
;
return
ncclSuccess
;
}
// ncclFuncReduce NCCL_ALGO_RING NCCL_PROTO_SIMPLE: 6144~262144 bytes (原始 8192~131072)
if
(
info
->
coll
==
ncclFuncReduce
&&
algorithm
==
NCCL_ALGO_RING
&&
protocol
==
NCCL_PROTO_SIMPLE
&&
info
->
nBytes
>
6144
&&
info
->
nBytes
<=
262144
)
{
*
time
=
0
;
return
ncclSuccess
;
}
if
(
info
->
coll
==
ncclFuncAllGather
&&
algorithm
==
NCCL_ALGO_RING
&&
protocol
==
NCCL_PROTO_LL
&&
info
->
nBytes
>
524288
&&
info
->
nBytes
<=
2097152
)
{
*
time
=
0
;
return
ncclSuccess
;
}
if
(
info
->
coll
==
ncclFuncReduceScatter
&&
algorithm
==
NCCL_ALGO_RING
&&
protocol
==
NCCL_PROTO_LL
&&
info
->
nBytes
>
524288
&&
info
->
nBytes
<=
2097152
)
{
*
time
=
0
;
return
ncclSuccess
;
}
}
if
(
bw
==
0
)
{
if
(
bw
==
0
)
{
*
time
=
-
1.0
;
return
ncclSuccess
;
*
time
=
-
1.0
;
return
ncclSuccess
;
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment