/************************************************************************* * Copyright (c) 2016-2022, NVIDIA CORPORATION. All rights reserved. * Modifications Copyright (c) 2020-2022 Advanced Micro Devices, Inc. All rights reserved. * * See LICENSE.txt for license information ************************************************************************/ #include #include "common.h" void SendRecvGetCollByteCount(size_t *sendcount, size_t *recvcount, size_t *paramcount, size_t *sendInplaceOffset, size_t *recvInplaceOffset, size_t count, int nranks) { *sendcount = count; *recvcount = count; *sendInplaceOffset = 0; *recvInplaceOffset = 0; *paramcount = *sendcount; } testResult_t SendRecvInitData(struct threadArgs* args, ncclDataType_t type, ncclRedOp_t op, int root, int rep, int in_place) { size_t sendcount = args->sendBytes / wordSize(type); size_t recvcount = args->expectedBytes / wordSize(type); int nranks = args->nProcs*args->nThreads*args->nGpus*args->nRanks; int k=0; for (int i=0; inGpus; i++) { HIPCHECK(hipSetDevice(args->gpus[i])); for (int l=0; lnRanks; l++) { int rank = ((args->proc*args->nThreads + args->thread)*args->nGpus*args->nRanks + i*args->nRanks + l); HIPCHECK(hipMemset(args->recvbuffs[k], 0, args->expectedBytes)); void* data = in_place ? args->recvbuffs[k] : args->sendbuffs[k]; TESTCHECK(InitData(data, sendcount, rank*sendcount, type, ncclSum, rep, 1, 0)); int peer = (rank-1+nranks)%nranks; TESTCHECK(InitData(args->expected[k], recvcount, peer*recvcount, type, ncclSum, rep, 1, 0)); k++; } HIPCHECK(hipDeviceSynchronize()); } // We don't support in-place sendrecv args->reportErrors = in_place ? 0 : 1; return testSuccess; } void SendRecvGetBw(size_t count, int typesize, double sec, double* algBw, double* busBw, int nranks) { double baseBw = (double)(count * typesize) / 1.0E9 / sec; *algBw = baseBw; double factor = 1; *busBw = baseBw * factor; } testResult_t SendRecvRunColl(void* sendbuff, void* recvbuff, size_t count, ncclDataType_t type, ncclRedOp_t op, int root, ncclComm_t comm, hipStream_t stream) { int nRanks; NCCLCHECK(ncclCommCount(comm, &nRanks)); int rank; NCCLCHECK(ncclCommUserRank(comm, &rank)); int recvPeer = (rank-1+nRanks) % nRanks; int sendPeer = (rank+1) % nRanks; NCCLCHECK(ncclGroupStart()); NCCLCHECK(ncclSend(sendbuff, count, type, sendPeer, comm, stream)); NCCLCHECK(ncclRecv(recvbuff, count, type, recvPeer, comm, stream)); NCCLCHECK(ncclGroupEnd()); return testSuccess; } struct testColl sendRecvTest = { "SendRecv", SendRecvGetCollByteCount, SendRecvInitData, SendRecvGetBw, SendRecvRunColl }; void SendRecvGetBuffSize(size_t *sendcount, size_t *recvcount, size_t count, int nranks) { size_t paramcount, sendInplaceOffset, recvInplaceOffset; SendRecvGetCollByteCount(sendcount, recvcount, ¶mcount, &sendInplaceOffset, &recvInplaceOffset, count, nranks); } testResult_t SendRecvRunTest(struct threadArgs* args, int root, ncclDataType_t type, const char* typeName, ncclRedOp_t op, const char* opName) { args->collTest = &sendRecvTest; ncclDataType_t *run_types; ncclRedOp_t *run_ops; const char **run_typenames, **run_opnames; int type_count, op_count; if ((int)type != -1) { type_count = 1; run_types = &type; run_typenames = &typeName; } else { type_count = test_typenum; run_types = test_types; run_typenames = test_typenames; } if ((int)op != -1) { op_count = 1; run_ops = &op; run_opnames = &opName; } else { op_count = test_opnum; run_ops = test_ops; run_opnames = test_opnames; } for (int i=0; i