Commit af903c20 authored by wanghan's avatar wanghan
Browse files

Update

parent 08b50ba4
......@@ -554,56 +554,6 @@ ncclResult_t ncclTopoTuneModel(struct ncclComm* comm, int minCompCap, int maxCom
ncclResult_t ncclTopoGetAlgoTime(struct ncclInfo* info, int algorithm, int protocol, int numPipeOps, float* time) {
float bw = info->comm->bandwidths[info->coll][algorithm][protocol];
float lat = info->comm->latencies[info->coll][algorithm][protocol];
// 根据 qz.txt 性能数据自动调整执行时间
// 支持多个 nRanks 配置的优化
// 调整策略:
// - diff≥8%: size_range = [original×0.5, original×2.0]
// - diff<8%: size_range = [original×0.75, original×1.5]
// 优化配置: nRanks == 8
if (info->comm->nRanks == 8) {
// ncclFuncAllReduce NCCL_ALGO_TREE NCCL_PROTO_LL: 8~1024 bytes (原始 16~512)
if (info->coll == ncclFuncAllReduce && algorithm == NCCL_ALGO_TREE && protocol == NCCL_PROTO_LL && info->nBytes > 8 && info->nBytes <= 1024) {
*time = 0;
return ncclSuccess;
}
// ncclFuncAllReduce NCCL_ALGO_TREE NCCL_PROTO_LL: 1536~196608 bytes (原始 2048~131072)
if (info->coll == ncclFuncAllReduce && algorithm == NCCL_ALGO_TREE && protocol == NCCL_PROTO_LL && info->nBytes > 1536 && info->nBytes <= 196608) {
*time = 0;
return ncclSuccess;
}
// ncclFuncAllReduce NCCL_ALGO_RING NCCL_PROTO_LL: 262144~4194304 bytes (原始 524288~2097152)
if (info->coll == ncclFuncAllReduce && algorithm == NCCL_ALGO_RING && protocol == NCCL_PROTO_LL && info->nBytes > 262144 && info->nBytes <= 4194304) {
*time = 0;
return ncclSuccess;
}
// ncclFuncAllReduce NCCL_ALGO_RING NCCL_PROTO_SIMPLE: 2097152~2147483648 bytes (原始 4194304~1073741824)
if (info->coll == ncclFuncAllReduce && algorithm == NCCL_ALGO_RING && protocol == NCCL_PROTO_SIMPLE && info->nBytes > 2097152 && info->nBytes <= 2147483648) {
*time = 0;
return ncclSuccess;
}
// ncclFuncBroadcast NCCL_ALGO_RING NCCL_PROTO_SIMPLE: 6144~262144 bytes (原始 8192~131072)
if (info->coll == ncclFuncBroadcast && algorithm == NCCL_ALGO_RING && protocol == NCCL_PROTO_SIMPLE && info->nBytes > 6144 && info->nBytes <= 262144) {
*time = 0;
return ncclSuccess;
}
// ncclFuncReduce NCCL_ALGO_RING NCCL_PROTO_SIMPLE: 6144~262144 bytes (原始 8192~131072)
if (info->coll == ncclFuncReduce && algorithm == NCCL_ALGO_RING && protocol == NCCL_PROTO_SIMPLE && info->nBytes > 6144 && info->nBytes <= 262144) {
*time = 0;
return ncclSuccess;
}
if (info->coll == ncclFuncAllGather && algorithm == NCCL_ALGO_RING && protocol == NCCL_PROTO_LL && info->nBytes > 524288 && info->nBytes <= 2097152) {
*time = 0;
return ncclSuccess;
}
if (info->coll == ncclFuncReduceScatter && algorithm == NCCL_ALGO_RING && protocol == NCCL_PROTO_LL && info->nBytes > 524288 && info->nBytes <= 2097152) {
*time = 0;
return ncclSuccess;
}
}
if (bw == 0) {
*time = -1.0; return ncclSuccess;
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment