/************************************************************************* * Copyright (c) 2015-2022, NVIDIA CORPORATION. All rights reserved. * * See LICENSE.txt for license information ************************************************************************/ #include "nvmlwrap.h" #include "base.h" #include #include #include namespace sccl { namespace hardware { namespace topology { int scclNvmlDeviceCount = 0; scclNvmlDeviceInfo scclNvmlDevices[scclNvmlMaxDevices]; scclNvmlDevicePairInfo scclNvmlDevicePairs[scclNvmlMaxDevices][scclNvmlMaxDevices]; #if SCCL_NVML_DIRECT #define SCCL_NVML_FN(name, rettype, arglist) constexpr rettype(*pfn_##name) arglist = name; #else #include #define SCCL_NVML_FN(name, rettype, arglist) rettype(*pfn_##name) arglist = nullptr; #endif namespace { SCCL_NVML_FN(nvmlInit, nvmlReturn_t, ()) SCCL_NVML_FN(nvmlInit_v2, nvmlReturn_t, ()) SCCL_NVML_FN(nvmlShutdown, nvmlReturn_t, ()) SCCL_NVML_FN(nvmlDeviceGetCount, nvmlReturn_t, (unsigned int*)) SCCL_NVML_FN(nvmlDeviceGetCount_v2, nvmlReturn_t, (unsigned int*)) SCCL_NVML_FN(nvmlDeviceGetHandleByPciBusId, nvmlReturn_t, (const char* pciBusId, nvmlDevice_t* device)) SCCL_NVML_FN(nvmlDeviceGetHandleByIndex, nvmlReturn_t, (unsigned int index, nvmlDevice_t* device)) SCCL_NVML_FN(nvmlDeviceGetIndex, nvmlReturn_t, (nvmlDevice_t device, unsigned* index)) SCCL_NVML_FN(nvmlErrorString, char const*, (nvmlReturn_t r)) SCCL_NVML_FN(nvmlDeviceGetNvLinkState, nvmlReturn_t, (nvmlDevice_t device, unsigned int link, nvmlEnableState_t* isActive)) SCCL_NVML_FN(nvmlDeviceGetNvLinkRemotePciInfo, nvmlReturn_t, (nvmlDevice_t device, unsigned int link, nvmlPciInfo_t* pci)) SCCL_NVML_FN(nvmlDeviceGetNvLinkCapability, nvmlReturn_t, (nvmlDevice_t device, unsigned int link, nvmlNvLinkCapability_t capability, unsigned int* capResult)) SCCL_NVML_FN(nvmlDeviceGetCudaComputeCapability, nvmlReturn_t, (nvmlDevice_t device, int* major, int* minor)) SCCL_NVML_FN(nvmlDeviceGetP2PStatus, nvmlReturn_t, (nvmlDevice_t device1, nvmlDevice_t device2, nvmlGpuP2PCapsIndex_t p2pIndex, nvmlGpuP2PStatus_t* p2pStatus)) SCCL_NVML_FN(nvmlDeviceGetFieldValues, nvmlReturn_t, (nvmlDevice_t device, int valuesCount, nvmlFieldValue_t* values)) std::mutex lock; // NVML has had some thread safety bugs bool initialized = false; thread_local bool threadInitialized = false; scclResult_t initResult; } // namespace scclResult_t scclNvmlEnsureInitialized() { // Optimization to avoid repeatedly grabbing the lock when we only want to // read from the global tables. if(threadInitialized) return initResult; threadInitialized = true; std::lock_guard locked(lock); if(initialized) return initResult; initialized = true; #if !SCCL_NVML_DIRECT if(pfn_nvmlInit == nullptr) { void* libhandle = dlopen("libnvidia-ml.so.1", RTLD_NOW); if(libhandle == nullptr) { WARN("Failed to open libnvidia-ml.so.1"); initResult = scclSystemError; return initResult; } struct Symbol { void** ppfn; char const* name; }; std::initializer_list symbols = {{(void**)&pfn_nvmlInit, "nvmlInit"}, {(void**)&pfn_nvmlInit_v2, "nvmlInit_v2"}, {(void**)&pfn_nvmlShutdown, "nvmlShutdown"}, {(void**)&pfn_nvmlDeviceGetCount, "nvmlDeviceGetCount"}, {(void**)&pfn_nvmlDeviceGetCount_v2, "nvmlDeviceGetCount_v2"}, {(void**)&pfn_nvmlDeviceGetHandleByPciBusId, "nvmlDeviceGetHandleByPciBusId"}, {(void**)&pfn_nvmlDeviceGetHandleByIndex, "nvmlDeviceGetHandleByIndex"}, {(void**)&pfn_nvmlDeviceGetIndex, "nvmlDeviceGetIndex"}, {(void**)&pfn_nvmlErrorString, "nvmlErrorString"}, {(void**)&pfn_nvmlDeviceGetNvLinkState, "nvmlDeviceGetNvLinkState"}, {(void**)&pfn_nvmlDeviceGetNvLinkRemotePciInfo, "nvmlDeviceGetNvLinkRemotePciInfo"}, {(void**)&pfn_nvmlDeviceGetNvLinkCapability, "nvmlDeviceGetNvLinkCapability"}, {(void**)&pfn_nvmlDeviceGetCudaComputeCapability, "nvmlDeviceGetCudaComputeCapability"}, {(void**)&pfn_nvmlDeviceGetP2PStatus, "nvmlDeviceGetP2PStatus"}, {(void**)&pfn_nvmlDeviceGetFieldValues, "nvmlDeviceGetFieldValues"}}; for(Symbol sym : symbols) { *sym.ppfn = dlsym(libhandle, sym.name); } } #endif #if SCCL_NVML_DIRECT bool have_v2 = true; #else bool have_v2 = pfn_nvmlInit_v2 != nullptr; // if this compare is done in the SCCL_NVML_DIRECT=1 case then GCC warns about it never being null #endif nvmlReturn_t res1 = (have_v2 ? pfn_nvmlInit_v2 : pfn_nvmlInit)(); if(res1 != NVML_SUCCESS) { WARN("nvmlInit%s() failed: %s", have_v2 ? "_v2" : "", pfn_nvmlErrorString(res1)); initResult = scclSystemError; return initResult; } unsigned int ndev; res1 = (have_v2 ? pfn_nvmlDeviceGetCount_v2 : pfn_nvmlDeviceGetCount)(&ndev); if(res1 != NVML_SUCCESS) { WARN("nvmlDeviceGetCount%s() failed: %s", have_v2 ? "_v2" : "", pfn_nvmlErrorString(res1)); initResult = scclSystemError; return initResult; } scclNvmlDeviceCount = int(ndev); if(scclNvmlMaxDevices < scclNvmlDeviceCount) { WARN("nvmlDeviceGetCount() reported more devices (%d) than the internal maximum (scclNvmlMaxDevices=%d)", scclNvmlDeviceCount, scclNvmlMaxDevices); initResult = scclInternalError; return initResult; } for(int a = 0; a < scclNvmlDeviceCount; a++) { res1 = pfn_nvmlDeviceGetHandleByIndex(a, &scclNvmlDevices[a].handle); if(res1 != NVML_SUCCESS) { WARN("nvmlDeviceGetHandleByIndex(%d) failed: %s", int(a), pfn_nvmlErrorString(res1)); initResult = scclSystemError; return initResult; } res1 = pfn_nvmlDeviceGetCudaComputeCapability( scclNvmlDevices[a].handle, &scclNvmlDevices[a].computeCapabilityMajor, &scclNvmlDevices[a].computeCapabilityMinor); if(res1 != NVML_SUCCESS) { WARN("nvmlDeviceGetCudaComputeCapability(%d) failed: %s", int(a), pfn_nvmlErrorString(res1)); initResult = scclSystemError; return initResult; } } for(int a = 0; a < scclNvmlDeviceCount; a++) { for(int b = 0; b < scclNvmlDeviceCount; b++) { nvmlDevice_t da = scclNvmlDevices[a].handle; nvmlDevice_t db = scclNvmlDevices[b].handle; res1 = pfn_nvmlDeviceGetP2PStatus(da, db, NVML_P2P_CAPS_INDEX_READ, &scclNvmlDevicePairs[a][b].p2pStatusRead); if(res1 != NVML_SUCCESS) { WARN("nvmlDeviceGetP2PStatus(%d,%d,NVML_P2P_CAPS_INDEX_READ) failed: %s", a, b, pfn_nvmlErrorString(res1)); initResult = scclSystemError; return initResult; } res1 = pfn_nvmlDeviceGetP2PStatus(da, db, NVML_P2P_CAPS_INDEX_WRITE, &scclNvmlDevicePairs[a][b].p2pStatusWrite); if(res1 != NVML_SUCCESS) { WARN("nvmlDeviceGetP2PStatus(%d,%d,NVML_P2P_CAPS_INDEX_READ) failed: %s", a, b, pfn_nvmlErrorString(res1)); initResult = scclSystemError; return initResult; } } } initResult = scclSuccess; return initResult; } #define NVMLCHECK(name, ...) \ do { \ nvmlReturn_t e44241808 = pfn_##name(__VA_ARGS__); \ if(e44241808 != NVML_SUCCESS) { \ WARN(#name "() failed: %s", pfn_nvmlErrorString(e44241808)); \ return scclSystemError; \ } \ } while(0) #define NVMLTRY(name, ...) \ do { \ if(!SCCL_NVML_DIRECT && pfn_##name == nullptr) \ return scclInternalError; /* missing symbol is not a warned error */ \ nvmlReturn_t e44241808 = pfn_##name(__VA_ARGS__); \ if(e44241808 != NVML_SUCCESS) { \ if(e44241808 != NVML_ERROR_NOT_SUPPORTED) \ INFO(SCCL_LOG_TOPO, #name "() failed: %s", pfn_nvmlErrorString(e44241808)); \ return scclSystemError; \ } \ } while(0) scclResult_t scclNvmlDeviceGetHandleByPciBusId(const char* pciBusId, nvmlDevice_t* device) { SCCLCHECK(scclNvmlEnsureInitialized()); std::lock_guard locked(lock); NVMLCHECK(nvmlDeviceGetHandleByPciBusId, pciBusId, device); return scclSuccess; } scclResult_t scclNvmlDeviceGetHandleByIndex(unsigned int index, nvmlDevice_t* device) { SCCLCHECK(scclNvmlEnsureInitialized()); *device = scclNvmlDevices[index].handle; return scclSuccess; } scclResult_t scclNvmlDeviceGetIndex(nvmlDevice_t device, unsigned* index) { SCCLCHECK(scclNvmlEnsureInitialized()); for(int d = 0; d < scclNvmlDeviceCount; d++) { if(scclNvmlDevices[d].handle == device) { *index = d; return scclSuccess; } } return scclInvalidArgument; } scclResult_t scclNvmlDeviceGetNvLinkState(nvmlDevice_t device, unsigned int link, nvmlEnableState_t* isActive) { SCCLCHECK(scclNvmlEnsureInitialized()); std::lock_guard locked(lock); NVMLTRY(nvmlDeviceGetNvLinkState, device, link, isActive); return scclSuccess; } scclResult_t scclNvmlDeviceGetNvLinkRemotePciInfo(nvmlDevice_t device, unsigned int link, nvmlPciInfo_t* pci) { SCCLCHECK(scclNvmlEnsureInitialized()); std::lock_guard locked(lock); NVMLTRY(nvmlDeviceGetNvLinkRemotePciInfo, device, link, pci); return scclSuccess; } scclResult_t scclNvmlDeviceGetNvLinkCapability(nvmlDevice_t device, unsigned int link, nvmlNvLinkCapability_t capability, unsigned int* capResult) { SCCLCHECK(scclNvmlEnsureInitialized()); std::lock_guard locked(lock); NVMLTRY(nvmlDeviceGetNvLinkCapability, device, link, capability, capResult); return scclSuccess; } scclResult_t scclNvmlDeviceGetCudaComputeCapability(nvmlDevice_t device, int* major, int* minor) { SCCLCHECK(scclNvmlEnsureInitialized()); for(int d = 0; d < scclNvmlDeviceCount; d++) { if(device == scclNvmlDevices[d].handle) { *major = scclNvmlDevices[d].computeCapabilityMajor; *minor = scclNvmlDevices[d].computeCapabilityMinor; return scclSuccess; } } return scclInvalidArgument; } scclResult_t scclNvmlDeviceGetP2PStatus(nvmlDevice_t device1, nvmlDevice_t device2, nvmlGpuP2PCapsIndex_t p2pIndex, nvmlGpuP2PStatus_t* p2pStatus) { SCCLCHECK(scclNvmlEnsureInitialized()); if(p2pIndex == NVML_P2P_CAPS_INDEX_READ || p2pIndex == NVML_P2P_CAPS_INDEX_WRITE) { int a = -1, b = -1; for(int d = 0; d < scclNvmlDeviceCount; d++) { if(device1 == scclNvmlDevices[d].handle) a = d; if(device2 == scclNvmlDevices[d].handle) b = d; } if(a == -1 || b == -1) return scclInvalidArgument; if(p2pIndex == NVML_P2P_CAPS_INDEX_READ) *p2pStatus = scclNvmlDevicePairs[a][b].p2pStatusRead; else *p2pStatus = scclNvmlDevicePairs[a][b].p2pStatusWrite; } else { std::lock_guard locked(lock); NVMLCHECK(nvmlDeviceGetP2PStatus, device1, device2, p2pIndex, p2pStatus); } return scclSuccess; } scclResult_t scclNvmlDeviceGetFieldValues(nvmlDevice_t device, int valuesCount, nvmlFieldValue_t* values) { SCCLCHECK(scclNvmlEnsureInitialized()); std::lock_guard locked(lock); NVMLTRY(nvmlDeviceGetFieldValues, device, valuesCount, values); return scclSuccess; } } // namespace topology } // namespace hardware } // namespace sccl