/************************************************************************* * Copyright (c) 2016-2020, NVIDIA CORPORATION. All rights reserved. * * See LICENSE.txt for license information ************************************************************************/ #include "utils.h" // #include "core.h" // #include "nvmlwrap.h" #include #include #include namespace sccl { // // Get current Compute Capability // int scclCudaCompCap() { // int hipDev; // if(cudaGetDevice(&hipDev) != cudaSuccess) // return 0; // int ccMajor, ccMinor; // if(cudaDeviceGetAttribute(&ccMajor, cudaDevAttrComputeCapabilityMajor, hipDev) != cudaSuccess) // return 0; // if(cudaDeviceGetAttribute(&ccMinor, cudaDevAttrComputeCapabilityMinor, hipDev) != cudaSuccess) // return 0; // return ccMajor * 10 + ccMinor; // } // scclResult_t int64ToBusId(int64_t id, char* busId) { // sprintf(busId, "%04lx:%02lx:%02lx.%01lx", (id) >> 20, (id & 0xff000) >> 12, (id & 0xff0) >> 4, (id & 0xf)); // return scclSuccess; // } // scclResult_t busIdToInt64(const char* busId, int64_t* id) { // char hexStr[17]; // Longest possible int64 hex string + null terminator. // int hexOffset = 0; // for(int i = 0; hexOffset < sizeof(hexStr) - 1; i++) { // char c = busId[i]; // if(c == '.' || c == ':') // continue; // if((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f')) { // hexStr[hexOffset++] = busId[i]; // } else // break; // } // hexStr[hexOffset] = '\0'; // *id = strtol(hexStr, NULL, 16); // return scclSuccess; // } // // Convert a logical hipDev index to the NVML device minor number // scclResult_t getBusId(int hipDev, int64_t* busId) { // // On most systems, the PCI bus ID comes back as in the 0000:00:00.0 // // format. Still need to allocate proper space in case PCI domain goes // // higher. // char busIdStr[] = "00000000:00:00.0"; // CUDACHECK(cudaDeviceGetPCIBusId(busIdStr, sizeof(busIdStr), hipDev)); // NCCLCHECK(busIdToInt64(busIdStr, busId)); // return scclSuccess; // } // scclResult_t getHostName(char* hostname, int maxlen, const char delim) { // if(gethostname(hostname, maxlen) != 0) { // strncpy(hostname, "unknown", maxlen); // return scclSystemError; // } // int i = 0; // while((hostname[i] != delim) && (hostname[i] != '\0') && (i < maxlen - 1)) // i++; // hostname[i] = '\0'; // return scclSuccess; // } // uint64_t getHash(const char* string, int n) { // // Based on DJB2a, result = result * 33 ^ char // uint64_t result = 5381; // for(int c = 0; c < n; c++) { // result = ((result << 5) + result) ^ string[c]; // } // return result; // } // /* Generate a hash of the unique identifying string for this host // * that will be unique for both bare-metal and container instances // * Equivalent of a hash of; // * // * $(hostname)$(cat /proc/sys/kernel/random/boot_id) // * // * This string can be overridden by using the NCCL_HOSTID env var. // */ // #define HOSTID_FILE "/proc/sys/kernel/random/boot_id" // uint64_t getHostHash(void) { // char hostHash[1024]; // char* hostId; // // Fall back is the full hostname if something fails // (void)getHostName(hostHash, sizeof(hostHash), '\0'); // int offset = strlen(hostHash); // if((hostId = getenv("NCCL_HOSTID")) != NULL) { // INFO(NCCL_ENV, "NCCL_HOSTID set by environment to %s", hostId); // strncpy(hostHash, hostId, sizeof(hostHash)); // } else { // FILE* file = fopen(HOSTID_FILE, "r"); // if(file != NULL) { // char* p; // if(fscanf(file, "%ms", &p) == 1) { // strncpy(hostHash + offset, p, sizeof(hostHash) - offset - 1); // free(p); // } // } // fclose(file); // } // // Make sure the string is terminated // hostHash[sizeof(hostHash) - 1] = '\0'; // TRACE(NCCL_INIT, "unique hostname '%s'", hostHash); // return getHash(hostHash, strlen(hostHash)); // } // /* Generate a hash of the unique identifying string for this process // * that will be unique for both bare-metal and container instances // * Equivalent of a hash of; // * // * $$ $(readlink /proc/self/ns/pid) // */ // uint64_t getPidHash(void) { // char pname[1024]; // // Start off with our pid ($$) // sprintf(pname, "%ld", (long)getpid()); // int plen = strlen(pname); // int len = readlink("/proc/self/ns/pid", pname + plen, sizeof(pname) - 1 - plen); // if(len < 0) // len = 0; // pname[plen + len] = '\0'; // TRACE(NCCL_INIT, "unique PID '%s'", pname); // return getHash(pname, strlen(pname)); // } // int parseStringList(const char* string, struct netIf* ifList, int maxList) { // if(!string) // return 0; // const char* ptr = string; // int ifNum = 0; // int ifC = 0; // char c; // do { // c = *ptr; // if(c == ':') { // if(ifC > 0) { // ifList[ifNum].prefix[ifC] = '\0'; // ifList[ifNum].port = atoi(ptr + 1); // ifNum++; // ifC = 0; // } // while(c != ',' && c != '\0') // c = *(++ptr); // } else if(c == ',' || c == '\0') { // if(ifC > 0) { // ifList[ifNum].prefix[ifC] = '\0'; // ifList[ifNum].port = -1; // ifNum++; // ifC = 0; // } // } else { // ifList[ifNum].prefix[ifC] = c; // ifC++; // } // ptr++; // } while(ifNum < maxList && c); // return ifNum; // } // static bool matchIf(const char* string, const char* ref, bool matchExact) { // // Make sure to include '\0' in the exact case // int matchLen = matchExact ? strlen(string) + 1 : strlen(ref); // return strncmp(string, ref, matchLen) == 0; // } // static bool matchPort(const int port1, const int port2) { // if(port1 == -1) // return true; // if(port2 == -1) // return true; // if(port1 == port2) // return true; // return false; // } // bool matchIfList(const char* string, int port, struct netIf* ifList, int listSize, bool matchExact) { // // Make an exception for the case where no user list is defined // if(listSize == 0) // return true; // for(int i = 0; i < listSize; i++) { // if(matchIf(string, ifList[i].prefix, matchExact) && matchPort(port, ifList[i].port)) { // return true; // } // } // return false; // } // __thread struct scclThreadSignal scclThreadSignalLocalInstance = scclThreadSignalStaticInitializer(); // void* scclMemoryStack::allocateSpilled(struct scclMemoryStack* me, size_t size, size_t align) { // // `me->hunks` points to the top of the stack non-empty hunks. Hunks above // // this (reachable via `->above`) are empty. // struct Hunk* top = me->topFrame.hunk; // size_t mallocSize = 0; // // If we have lots of space left in hunk but that wasn't enough then we'll // // allocate the object unhunked. // if(me->topFrame.end - me->topFrame.bumper >= 8 << 10) // goto unhunked; // // If we have another hunk (which must be empty) waiting above this one and // // the object fits then use that. // if(top && top->above) { // struct Hunk* top1 = top->above; // uintptr_t uobj = (reinterpret_cast(top1) + sizeof(struct Hunk) + align - 1) & -uintptr_t(align); // if(uobj + size <= reinterpret_cast(top1) + top1->size) { // me->topFrame.hunk = top1; // me->topFrame.bumper = uobj + size; // me->topFrame.end = reinterpret_cast(top1) + top1->size; // return reinterpret_cast(uobj); // } // } // { // If the next hunk we're going to allocate wouldn't be big enough but the // // Unhunk proxy fits in the current hunk then go allocate as unhunked. // size_t nextSize = (top ? top->size : 0) + (64 << 10); // constexpr size_t maxAlign = 64; // if(nextSize < sizeof(struct Hunk) + maxAlign + size) { // uintptr_t uproxy = (me->topFrame.bumper + alignof(Unhunk) - 1) & -uintptr_t(alignof(Unhunk)); // if(uproxy + sizeof(struct Unhunk) <= me->topFrame.end) // goto unhunked; // } // // At this point we must need another hunk, either to fit the object // // itself or its Unhunk proxy. // mallocSize = nextSize; // INFO(NCCL_ALLOC, "%s:%d memory stack hunk malloc(%llu)", __FILE__, __LINE__, (unsigned long long)mallocSize); // struct Hunk* top1 = (struct Hunk*)malloc(mallocSize); // if(top1 == nullptr) // goto malloc_exhausted; // top1->size = nextSize; // top1->above = nullptr; // if(top) // top->above = top1; // top = top1; // me->topFrame.hunk = top; // me->topFrame.end = reinterpret_cast(top) + nextSize; // me->topFrame.bumper = reinterpret_cast(top) + sizeof(struct Hunk); // } // { // Try to fit object in the new top hunk. // uintptr_t uobj = (me->topFrame.bumper + align - 1) & -uintptr_t(align); // if(uobj + size <= me->topFrame.end) { // me->topFrame.bumper = uobj + size; // return reinterpret_cast(uobj); // } // } // unhunked: { // We need to allocate the object out-of-band and put an Unhunk proxy in-band // // to keep track of it. // uintptr_t uproxy = (me->topFrame.bumper + alignof(Unhunk) - 1) & -uintptr_t(alignof(Unhunk)); // Unhunk* proxy = reinterpret_cast(uproxy); // me->topFrame.bumper = uproxy + sizeof(Unhunk); // proxy->next = me->topFrame.unhunks; // me->topFrame.unhunks = proxy; // mallocSize = size; // proxy->obj = malloc(mallocSize); // INFO(NCCL_ALLOC, "%s:%d memory stack non-hunk malloc(%llu)", __FILE__, __LINE__, (unsigned long long)mallocSize); // if(proxy->obj == nullptr) // goto malloc_exhausted; // return proxy->obj; // } // malloc_exhausted: // WARN("%s:%d Unrecoverable error detected: malloc(size=%llu) returned null.", __FILE__, __LINE__, (unsigned long long)mallocSize); // abort(); // } // void scclMemoryStackDestruct(struct scclMemoryStack* me) { // // Free unhunks first because both the frames and unhunk proxies lie within the hunks. // struct scclMemoryStack::Frame* f = &me->topFrame; // while(f != nullptr) { // struct scclMemoryStack::Unhunk* u = f->unhunks; // while(u != nullptr) { // free(u->obj); // u = u->next; // } // f = f->below; // } // // Free hunks // struct scclMemoryStack::Hunk* h = me->stub.above; // while(h != nullptr) { // struct scclMemoryStack::Hunk* h1 = h->above; // free(h); // h = h1; // } // } // typedef struct { // pid_t pid; // pid_t ppid; // char pcmdLine[4096]; // char cmdLine[4096]; // } appConfigOptimizeArg_t; // static bool barrier_Flag; // int maxGPUs = -1; // int initInfo() { // /* get barrier_Flag */ // uint32_t index = 0; // appConfigOptimizeArg_t args = {0}; // args.pid = getpid(); // args.ppid = getppid(); // std::string cmdLinePath = "/proc/" + std::to_string(args.ppid) + "/cmdline"; // std::ifstream cmdLineFile; // cmdLineFile.open(cmdLinePath.c_str()); // cmdLineFile.read(args.pcmdLine, sizeof(args.pcmdLine)); // cmdLineFile.close(); // cmdLinePath = "/proc/" + std::to_string(args.pid) + "/cmdline"; // cmdLineFile.open(cmdLinePath.c_str()); // cmdLineFile.read(args.cmdLine, sizeof(args.cmdLine)); // cmdLineFile.close(); // if(memmem(args.cmdLine, sizeof(args.cmdLine), "sccl_context_test", strlen("sccl_context_test")) || // memmem(args.pcmdLine, sizeof(args.pcmdLine), "sccl_context_test", strlen("sccl_context_test"))) { // barrier_Flag = true; // } else { // barrier_Flag = false; // } // INFO(NCCL_INIT, "Init config for sccl_context_test: %d", barrier_Flag); // /* get maximum number of GPUs in all NUMA nodes */ // if(maxGPUs == -1) { // int gpuCount[32] = {0}; // Assume MAX_NUMA_NODES=32 // int deviceCount; // hipGetDeviceCount(&deviceCount); // // Get numbers of GPUs in all NUMA nodes in system // for(int i = 1; i <= deviceCount; ++i) { // char path[256]; // snprintf(path, sizeof(path), "/sys/class/drm/card%d/device/numa_node", i); // FILE* fp = fopen(path, "r"); // if(fp == NULL) { // perror("Error opening NUMA node file"); // continue; // } // int numaNode; // if(fscanf(fp, "%d", &numaNode) == 1 && numaNode >= 0 && numaNode < 32) { // gpuCount[numaNode]++; // } // fclose(fp); // } // // Find maximum number of GPUs in all NUMA nodes // for(int i = 0; i < 32; ++i) { // if(gpuCount[i] > maxGPUs) { // maxGPUs = gpuCount[i]; // } // } // INFO(NCCL_INIT, "Maximum number of GPUs in any NUMA node: %d\n", maxGPUs); // } // return 0; // } // bool getBarrierFlag() { return barrier_Flag; } // int getNumaMaxGpus() { return maxGPUs; } } // namespace sccl