/************************************************************************* * Copyright (c) 2016-2019, NVIDIA CORPORATION. All rights reserved. * Modifications Copyright (c) 2019-2020 Advanced Micro Devices, Inc. All rights reserved. * * See LICENSE.txt for license information ************************************************************************/ #include "nccl.h" #include "channel.h" #include "nvmlwrap.h" #include "bootstrap.h" #include "transport.h" #include "group.h" #include "net.h" #include "coll_net.h" #include "enqueue.h" #include "graph.h" #include "argcheck.h" #include #include #include #include #include #include #include #include #include #include #ifdef ENABLE_TRACE std::chrono::high_resolution_clock::time_point ncclEpoch; #endif ncclNet_t* ncclNet = NULL; // Returns ncclInternalError if anything fails, causing that network to be ignored. ncclResult_t initNet(ncclNet_t* net) { int ndev; if (net->init(ncclDebugLog) != ncclSuccess) return ncclInternalError; if (net->devices(&ndev) != ncclSuccess) return ncclInternalError; if (ndev <= 0) return ncclSystemError; return ncclSuccess; } ncclResult_t initNet() { if (initNet(&ncclNetIb) == ncclSuccess) { ncclNet = &ncclNetIb; } return ncclSuccess; } ncclResult_t getHostName(char* hostname, int maxlen, const char delim) { if (gethostname(hostname, maxlen) != 0) { strncpy(hostname, "unknown", maxlen); return ncclSystemError; } int i = 0; while ((hostname[i] != delim) && (hostname[i] != '\0') && (i < maxlen-1)) i++; hostname[i] = '\0'; return ncclSuccess; } int parseStringList(const char* string, struct netIf* ifList, int maxList) { if (!string) return 0; const char* ptr = string; int ifNum = 0; int ifC = 0; char c; do { c = *ptr; if (c == ':') { if (ifC > 0) { ifList[ifNum].prefix[ifC] = '\0'; ifList[ifNum].port = atoi(ptr+1); ifNum++; ifC = 0; } while (c != ',' && c != '\0') c = *(++ptr); } else if (c == ',' || c == '\0') { if (ifC > 0) { ifList[ifNum].prefix[ifC] = '\0'; ifList[ifNum].port = -1; ifNum++; ifC = 0; } } else { ifList[ifNum].prefix[ifC] = c; ifC++; } ptr++; } while (ifNum < maxList && c); return ifNum; } static bool matchIf(const char* string, const char* ref, bool matchExact) { // Make sure to include '\0' in the exact case int matchLen = matchExact ? strlen(string) + 1 : strlen(ref); return strncmp(string, ref, matchLen) == 0; } static bool matchPort(const int port1, const int port2) { if (port1 == -1) return true; if (port2 == -1) return true; if (port1 == port2) return true; return false; } bool matchIfList(const char* string, int port, struct netIf* ifList, int listSize, bool matchExact) { // Make an exception for the case where no user list is defined if (listSize == 0) return true; for (int i=0; i