Unverified Commit b111aa5a authored by Daniel Hiltgen's avatar Daniel Hiltgen Committed by GitHub
Browse files

Debug logging for nvcuda init (#7532)

Some users are reporting crashes during nvcuda.dll initialization
on windows.  This should help narrow down where things are going bad.
parent 9e83e550
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
#include "gpu_info_nvcuda.h" #include "gpu_info_nvcuda.h"
void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) { void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) {
LOG(resp->ch.verbose, "initializing %s\n", nvcuda_lib_path);
CUresult ret; CUresult ret;
resp->err = NULL; resp->err = NULL;
resp->num_devices = 0; resp->num_devices = 0;
...@@ -57,8 +58,10 @@ void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) { ...@@ -57,8 +58,10 @@ void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) {
resp->cudaErr = -1; resp->cudaErr = -1;
return; return;
} }
LOG(resp->ch.verbose, "dlsym: %s - %p\n", l[i].s, *l[i].p);
} }
LOG(resp->ch.verbose, "calling cuInit\n");
ret = (*resp->ch.cuInit)(0); ret = (*resp->ch.cuInit)(0);
if (ret != CUDA_SUCCESS) { if (ret != CUDA_SUCCESS) {
LOG(resp->ch.verbose, "cuInit err: %d\n", ret); LOG(resp->ch.verbose, "cuInit err: %d\n", ret);
...@@ -75,15 +78,18 @@ void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) { ...@@ -75,15 +78,18 @@ void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) {
resp->ch.driver_minor = 0; resp->ch.driver_minor = 0;
// Report driver version if we're in verbose mode, ignore errors // Report driver version if we're in verbose mode, ignore errors
LOG(resp->ch.verbose, "calling cuDriverGetVersion\n");
ret = (*resp->ch.cuDriverGetVersion)(&version); ret = (*resp->ch.cuDriverGetVersion)(&version);
if (ret != CUDA_SUCCESS) { if (ret != CUDA_SUCCESS) {
LOG(resp->ch.verbose, "cuDriverGetVersion failed: %d\n", ret); LOG(resp->ch.verbose, "cuDriverGetVersion failed: %d\n", ret);
} else { } else {
LOG(resp->ch.verbose, "raw version 0x%x\n", version);
resp->ch.driver_major = version / 1000; resp->ch.driver_major = version / 1000;
resp->ch.driver_minor = (version - (resp->ch.driver_major * 1000)) / 10; resp->ch.driver_minor = (version - (resp->ch.driver_major * 1000)) / 10;
LOG(resp->ch.verbose, "CUDA driver version: %d.%d\n", resp->ch.driver_major, resp->ch.driver_minor); LOG(resp->ch.verbose, "CUDA driver version: %d.%d\n", resp->ch.driver_major, resp->ch.driver_minor);
} }
LOG(resp->ch.verbose, "calling cuDeviceGetCount\n");
ret = (*resp->ch.cuDeviceGetCount)(&resp->num_devices); ret = (*resp->ch.cuDeviceGetCount)(&resp->num_devices);
if (ret != CUDA_SUCCESS) { if (ret != CUDA_SUCCESS) {
LOG(resp->ch.verbose, "cuDeviceGetCount err: %d\n", ret); LOG(resp->ch.verbose, "cuDeviceGetCount err: %d\n", ret);
...@@ -94,6 +100,7 @@ void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) { ...@@ -94,6 +100,7 @@ void nvcuda_init(char *nvcuda_lib_path, nvcuda_init_resp_t *resp) {
resp->cudaErr = ret; resp->cudaErr = ret;
return; return;
} }
LOG(resp->ch.verbose, "device count %d\n", resp->num_devices);
} }
const int buflen = 256; const int buflen = 256;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment