Commit 6e953c61 authored by zsccc's avatar zsccc
Browse files

Update compiler.bak.py, compiler.py files

parents
# /root/Documents/bigdata/Anaconda3/envs/pynx/lib/python3.10/site-packages/pycuda/compiler.py
from pytools import memoize
# don't import pycuda.driver here--you'll create an import loop
import os
import sys
from tempfile import mkstemp
from os import unlink
from pytools.prefork import call_capture_output
@memoize
def get_nvcc_version(nvcc):
cmdline = [nvcc, "--version"]
result, stdout, stderr = call_capture_output(cmdline)
if result != 0 or not stdout:
from warnings import warn
warn("NVCC version could not be determined.")
stdout = "nvcc unknown version"
return stdout.decode("utf-8", "replace")
def _new_md5():
try:
import hashlib
return hashlib.md5()
except ImportError:
# for Python << 2.5
import md5
return md5.new()
def preprocess_source(source, options, nvcc):
handle, source_path = mkstemp(suffix=".cu")
outf = open(source_path, "w")
outf.write(source)
outf.close()
os.close(handle)
cmdline = [nvcc, "--preprocess"] + options + [source_path]
if "win32" in sys.platform:
cmdline.extend(["--compiler-options", "-EP"])
else:
cmdline.extend(["--compiler-options", "-P"])
result, stdout, stderr = call_capture_output(cmdline, error_on_nonzero=False)
if result != 0:
from pycuda.driver import CompileError
raise CompileError(
"nvcc preprocessing of %s failed" % source_path, cmdline, stderr=stderr
)
# sanity check
if len(stdout) < 0.5 * len(source):
from pycuda.driver import CompileError
raise CompileError(
"nvcc preprocessing of %s failed with ridiculously "
"small code output - likely unsupported compiler." % source_path,
cmdline,
stderr=stderr.decode("utf-8", "replace"),
)
unlink(source_path)
preprocessed_str = stdout.decode("utf-8", "replace")
# remove the temporary filename from the preprocessed source code to get reproducible hashes
return preprocessed_str.replace(os.path.basename(source_path), "")
def compile_plain(source, options, keep, nvcc, cache_dir, target="cubin"):
from os.path import join
assert target in ["cubin", "ptx", "fatbin"]
if cache_dir:
checksum = _new_md5()
if "#include" in source:
checksum.update(preprocess_source(source, options, nvcc).encode("utf-8"))
else:
checksum.update(source.encode("utf-8"))
for option in options:
checksum.update(option.encode("utf-8"))
checksum.update(get_nvcc_version(nvcc).encode("utf-8"))
from pycuda.characterize import platform_bits
checksum.update(str(platform_bits()).encode("utf-8"))
cache_file = checksum.hexdigest()
cache_path = join(cache_dir, cache_file + "." + target)
try:
cache_file = open(cache_path, "rb")
try:
return cache_file.read()
finally:
cache_file.close()
except Exception:
pass
from tempfile import mkdtemp
file_dir = mkdtemp()
file_root = "kernel"
cu_file_name = file_root + ".cu"
cu_file_path = join(file_dir, cu_file_name)
outf = open(cu_file_path, "w")
outf.write(str(source))
outf.close()
if keep:
options = options[:]
options.append("--keep")
print("*** compiler output in %s" % file_dir)
cmdline = [nvcc, "--" + target] + options + [cu_file_name]
result, stdout, stderr = call_capture_output(
cmdline, cwd=file_dir, error_on_nonzero=False
)
try:
result_f = open(join(file_dir, file_root + "." + target), "rb")
except OSError:
no_output = True
else:
no_output = False
if result != 0 or (no_output and (stdout or stderr)):
if result == 0:
from warnings import warn
warn(
"PyCUDA: nvcc exited with status 0, but appears to have "
"encountered an error"
)
from pycuda.driver import CompileError
raise CompileError(
"nvcc compilation of %s failed" % cu_file_path,
cmdline,
stdout=stdout.decode("utf-8", "replace"),
stderr=stderr.decode("utf-8", "replace"),
)
if stdout or stderr:
lcase_err_text = (stdout + stderr).decode("utf-8", "replace").lower()
from warnings import warn
if "demoted" in lcase_err_text or "demoting" in lcase_err_text:
warn(
"nvcc said it demoted types in source code it "
"compiled--this is likely not what you want.",
stacklevel=4,
)
warn(
"The CUDA compiler succeeded, but said the following:\n"
+ (stdout + stderr).decode("utf-8", "replace"),
stacklevel=4,
)
result_data = result_f.read()
result_f.close()
if cache_dir:
outf = open(cache_path, "wb")
outf.write(result_data)
outf.close()
if not keep:
from os import listdir, unlink, rmdir
for name in listdir(file_dir):
unlink(join(file_dir, name))
rmdir(file_dir)
return result_data
def _get_per_user_string():
try:
from os import getuid
except ImportError:
checksum = _new_md5()
from os import environ
checksum.update(environ["USERNAME"].encode("utf-8"))
return checksum.hexdigest()
else:
return "uid%d" % getuid()
def _find_pycuda_include_path():
import importlib.util
import os
return os.path.abspath(
os.path.join(importlib.util.find_spec("pycuda").origin,
os.path.pardir, "cuda"))
DEFAULT_NVCC_FLAGS = [
_flag.strip()
for _flag in os.environ.get("PYCUDA_DEFAULT_NVCC_FLAGS", "").split()
if _flag.strip()
]
def compile(
source,
nvcc="nvcc",
options=None,
keep=False,
no_extern_c=False,
arch=None,
code=None,
cache_dir=None,
include_dirs=[],
target="cubin",
):
assert target in ["cubin", "ptx", "fatbin"]
if not no_extern_c:
source = 'extern "C" {\n%s\n}\n' % source
if options is None:
options = DEFAULT_NVCC_FLAGS
options = options[:]
if arch is None:
from pycuda.driver import Error
try:
from pycuda.driver import Context
arch = "sm_%d%d" % Context.get_device().compute_capability()
except Error:
pass
from pycuda.driver import CUDA_DEBUGGING
if CUDA_DEBUGGING:
cache_dir = False
keep = True
options.extend(["-g", "-G"])
if "PYCUDA_CACHE_DIR" in os.environ and cache_dir is None:
cache_dir = os.environ["PYCUDA_CACHE_DIR"]
if "PYCUDA_DISABLE_CACHE" in os.environ:
cache_dir = False
if cache_dir is None:
import platformdirs
cache_dir = os.path.join(
platformdirs.user_cache_dir("pycuda", "pycuda"), "compiler-cache-v1"
)
from os import makedirs
makedirs(cache_dir, exist_ok=True)
if arch is not None:
options.extend(["-arch", arch])
if code is not None:
options.extend(["-code", code])
if "darwin" in sys.platform and sys.maxsize == 9223372036854775807:
options.append("-m64")
elif "win32" in sys.platform and sys.maxsize == 9223372036854775807:
options.append("-m64")
elif "win32" in sys.platform and sys.maxsize == 2147483647:
options.append("-m32")
include_dirs = include_dirs + [_find_pycuda_include_path()]
for i in include_dirs:
options.append("-I" + i)
return compile_plain(source, options, keep, nvcc, cache_dir, target)
class CudaModule:
def _check_arch(self, arch):
if arch is None:
return
try:
from pycuda.driver import Context
capability = Context.get_device().compute_capability()
if tuple(map(int, tuple(arch.split("_")[1]))) > capability:
from warnings import warn
warn(
"trying to compile for a compute capability "
"higher than selected GPU"
)
except Exception:
pass
def _bind_module(self):
self.get_global = self.module.get_global
self.get_texref = self.module.get_texref
if hasattr(self.module, "get_surfref"):
self.get_surfref = self.module.get_surfref
def get_function(self, name):
return self.module.get_function(name)
class SourceModule(CudaModule):
"""
Creates a Module from a single .cu source object linked against the
static CUDA runtime.
"""
def __init__(
self,
source,
nvcc="nvcc",
options=None,
keep=False,
no_extern_c=False,
arch=None,
code=None,
cache_dir=None,
include_dirs=[],
):
self._check_arch(arch)
cubin = compile(
source,
nvcc,
options,
keep,
no_extern_c,
arch,
code,
cache_dir,
include_dirs,
)
from pycuda.driver import module_from_buffer
self.module = module_from_buffer(cubin)
self._bind_module()
def _search_on_path(filenames):
"""Find file on system path."""
# http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52224
from os.path import exists, abspath, join
from os import pathsep, environ
search_path = environ["PATH"]
paths = search_path.split(pathsep)
for path in paths:
for filename in filenames:
if exists(join(path, filename)):
return abspath(join(path, filename))
@memoize
def _find_nvcc_on_path():
return _search_on_path(["nvcc", "nvcc.exe"])
class DynamicModule(CudaModule):
"""
Creates a Module from multiple .cu source, library file and/or data
objects linked against the static or dynamic CUDA runtime.
"""
def __init__(
self,
nvcc="nvcc",
link_options=None,
keep=False,
no_extern_c=False,
arch=None,
code=None,
cache_dir=None,
include_dirs=[],
message_handler=None,
log_verbose=False,
cuda_libdir=None,
):
from pycuda.driver import Context
compute_capability = Context.get_device().compute_capability()
if compute_capability < (3, 5):
raise Exception(
"Minimum compute capability for dynamic parallelism is 3.5 (found: %u.%u)!"
% (compute_capability[0], compute_capability[1])
)
else:
from pycuda.driver import Linker
self.linker = Linker(message_handler, link_options, log_verbose)
self._check_arch(arch)
self.nvcc = nvcc
self.keep = keep
self.no_extern_c = no_extern_c
self.arch = arch
self.code = code
self.cache_dir = cache_dir
self.include_dirs = include_dirs
self.cuda_libdir = cuda_libdir
self.libdir, self.libptn = None, None
self.module = None
def _locate_cuda_libdir(self):
"""
Locate the "standard" CUDA SDK library directory in the local
file system. Supports 64-Bit Windows, Linux and Mac OS X.
In case the caller supplied cuda_libdir in the constructor
other than None that value is returned unchecked, else a
best-effort attempt is made.
Precedence:
Windows: cuda_libdir > %CUDA_PATH%
Linux: cuda_libdir > $CUDA_ROOT > $LD_LIBRARY_PATH > '/usr/lib/x86_64-linux-gnu'
Returns a pair (libdir, libptn) where libdir is None in case
of failure or a string containing the absolute path of the
directory, and libptn is the %-format pattern to construct
library file names from library names on the local system.
Raises a RuntimeError in case of failure.
Links:
- Post-installation Actions
http://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#post-installation-actions
TODO:
- Is $CUDA_ROOT/lib64 the correct path to assume for 64-Bit CUDA libraries on Linux?
- Mac OS X (Darwin) is currently treated like Linux, is that correct?
- Check CMake's FindCUDA module, it might contain some helpful clues in its sources
https://cmake.org/cmake/help/v3.0/module/FindCUDA.html
https://github.com/Kitware/CMake/blob/master/Modules/FindCUDA.cmake
- Verify all Linux code paths somehow
"""
from os.path import isfile, join
from platform import system as platform_system
system = platform_system()
libdir, libptn = None, None
if system == "Windows":
if self.cuda_libdir is not None:
libdir = self.cuda_libdir
elif "CUDA_PATH" in os.environ and isfile(
join(os.environ["CUDA_PATH"], "lib\\x64\\cudadevrt.lib")
):
libdir = join(os.environ["CUDA_PATH"], "lib\\x64")
libptn = "%s.lib"
elif system in ["Linux", "Darwin"]:
if self.cuda_libdir is not None:
libdir = self.cuda_libdir
elif "CUDA_ROOT" in os.environ and isfile(
join(os.environ["CUDA_ROOT"], "lib64/libcudadevrt.a")
):
libdir = join(os.environ["CUDA_ROOT"], "lib64")
elif "LD_LIBRARY_PATH" in os.environ:
for ld_path in os.environ["LD_LIBRARY_PATH"].split(":"):
if isfile(join(ld_path, "libcudadevrt.a")):
libdir = ld_path
break
if libdir is None and isfile("/usr/lib/x86_64-linux-gnu/libcudadevrt.a"):
libdir = "/usr/lib/x86_64-linux-gnu"
if libdir is None:
nvcc_path = _find_nvcc_on_path()
if nvcc_path is not None:
libdir = join(os.path.dirname(nvcc_path), "..", "lib64")
libptn = "lib%s.a"
if libdir is None:
raise RuntimeError(
"Unable to locate the CUDA SDK installation "
"directory, set CUDA library path manually"
)
return libdir, libptn
def add_source(self, source, nvcc_options=None, name="kernel.ptx"):
ptx = compile(
source,
nvcc=self.nvcc,
options=nvcc_options,
keep=self.keep,
no_extern_c=self.no_extern_c,
arch=self.arch,
code=self.code,
cache_dir=self.cache_dir,
include_dirs=self.include_dirs,
target="ptx",
)
from pycuda.driver import jit_input_type
self.linker.add_data(ptx, jit_input_type.PTX, name)
return self
def add_data(self, data, input_type, name="unknown"):
self.linker.add_data(data, input_type, name)
return self
def add_file(self, filename, input_type):
self.linker.add_file(filename, input_type)
return self
def add_stdlib(self, libname):
if self.libdir is None:
self.libdir, self.libptn = self._locate_cuda_libdir()
from os.path import isfile, join
libpath = join(self.libdir, self.libptn % libname)
if not isfile(libpath):
raise OSError('CUDA SDK library file "%s" not found' % libpath)
from pycuda.driver import jit_input_type
self.linker.add_file(libpath, jit_input_type.LIBRARY)
return self
def link(self):
self.module = self.linker.link_module()
self.linker = None
self._bind_module()
return self
class DynamicSourceModule(DynamicModule):
"""
Creates a Module from a single .cu source object linked against the
dynamic CUDA runtime.
- compiler generates PTX relocatable device code (rdc) from source that
can be linked with other relocatable device code
- source is linked against the CUDA device runtime library cudadevrt
- library cudadevrt is statically linked into the generated Module
"""
def __init__(
self,
source,
nvcc="nvcc",
options=None,
keep=False,
no_extern_c=False,
arch=None,
code=None,
cache_dir=None,
include_dirs=[],
cuda_libdir=None,
):
super().__init__(
nvcc=nvcc,
link_options=None,
keep=keep,
no_extern_c=no_extern_c,
arch=arch,
code=code,
cache_dir=cache_dir,
include_dirs=include_dirs,
cuda_libdir=cuda_libdir,
)
if options is None:
options = DEFAULT_NVCC_FLAGS
options = options[:]
if "-rdc=true" not in options:
options.append("-rdc=true")
if "-lcudadevrt" not in options:
options.append("-lcudadevrt")
self.add_source(source, nvcc_options=options)
self.add_stdlib("cudadevrt")
self.link()
# /root/anaconda3/envs/pynx/lib/python3.10/site-packages/pycuda-2024.1.2-py3.10-linux-x86_64.egg/pycuda/compiler.py
from pytools import memoize
# don't import pycuda.driver here--you'll create an import loop
import os
import sys
from tempfile import mkstemp
from os import unlink
from pytools.prefork import call_capture_output
@memoize
def get_nvcc_version(nvcc):
cmdline = [nvcc, "--version"]
result, stdout, stderr = call_capture_output(cmdline)
if result != 0 or not stdout:
from warnings import warn
warn("NVCC version could not be determined.")
stdout = b"nvcc unknown version"
return stdout.decode("utf-8", "replace")
def _new_md5():
try:
import hashlib
return hashlib.md5()
except ImportError:
# for Python << 2.5
import md5
return md5.new()
def preprocess_source(source, options, nvcc):
handle, source_path = mkstemp(suffix=".cu")
outf = open(source_path, "w")
outf.write(source)
outf.close()
os.close(handle)
cmdline = [nvcc, "--preprocess"] + options + [source_path]
if "win32" in sys.platform:
cmdline.extend(["--compiler-options", "-EP"])
else:
cmdline.extend(["--compiler-options", "-P"])
result, stdout, stderr = call_capture_output(cmdline, error_on_nonzero=False)
if result != 0:
from pycuda.driver import CompileError
raise CompileError(
"nvcc preprocessing of %s failed" % source_path, cmdline, stderr=stderr
)
# sanity check
if len(stdout) < 0.5 * len(source):
from pycuda.driver import CompileError
raise CompileError(
"nvcc preprocessing of %s failed with ridiculously "
"small code output - likely unsupported compiler." % source_path,
cmdline,
stderr=stderr.decode("utf-8", "replace"),
)
unlink(source_path)
preprocessed_str = stdout.decode("utf-8", "replace")
# remove the temporary filename from the preprocessed source code to get reproducible hashes
return preprocessed_str.replace(os.path.basename(source_path), "")
def compile_plain(source, options, keep, nvcc, cache_dir, target="cubin"):
from os.path import join
assert target in ["cubin", "ptx", "fatbin"]
if cache_dir:
checksum = _new_md5()
if "#include" in source:
checksum.update(preprocess_source(source, options, nvcc).encode("utf-8"))
else:
checksum.update(source.encode("utf-8"))
for option in options:
checksum.update(option.encode("utf-8"))
checksum.update(get_nvcc_version(nvcc).encode("utf-8"))
from pycuda.characterize import platform_bits
checksum.update(str(platform_bits()).encode("utf-8"))
cache_file = checksum.hexdigest()
cache_path = join(cache_dir, cache_file + "." + target)
try:
cache_file = open(cache_path, "rb")
try:
return cache_file.read()
finally:
cache_file.close()
except Exception:
pass
from tempfile import mkdtemp
file_dir = mkdtemp()
file_root = "kernel"
cu_file_name = file_root + ".cu"
cu_file_path = join(file_dir, cu_file_name)
outf = open(cu_file_path, "w")
outf.write(str(source))
outf.close()
if keep:
options = options[:]
options.append("--keep")
print("*** compiler output in %s" % file_dir)
cmdline = [nvcc, "--" + target] + options + [cu_file_name]
result, stdout, stderr = call_capture_output(
cmdline, cwd=file_dir, error_on_nonzero=False
)
try:
# GPUfusion: kernel.cubin <=> kernel.cu-hip-amdgcn-amd-amdhsa.hipfb
if target == "cubin":
result_f = open(join(file_dir, cu_file_name + "-hip-amdgcn-amd-amdhsa.hipfb"), "rb")
print("compile file")
print(join(file_dir, cu_file_name + "-hip-amdgcn-amd-amdhsa.hipfb"))
# GPUfusion: kernel.ptx <=> kernel-hip-amdgcn-amd-amdhsa-gfx906.bc
elif target == "ptx":
result_f = open(join(file_dir, file_root + "-hip-amdgcn-amd-amdhsa-gfx906.bc"), "rb")
print("compile file")
print(join(file_dir, file_root + "-hip-amdgcn-amd-amdhsa-gfx906.bc"))
else:
result_f = open(join(file_dir, file_root + "." + target), "rb")
print("compile file")
print(join(file_dir, file_root + "." + target))
except OSError:
no_output = True
else:
no_output = False
if result != 0 or (no_output and (stdout or stderr)):
if result == 0:
from warnings import warn
warn(
"PyCUDA: nvcc exited with status 0, but appears to have "
"encountered an error"
)
from pycuda.driver import CompileError
raise CompileError(
"nvcc compilation of %s failed" % cu_file_path,
cmdline,
stdout=stdout.decode("utf-8", "replace"),
stderr=stderr.decode("utf-8", "replace"),
)
if stdout or stderr:
lcase_err_text = (stdout + stderr).decode("utf-8", "replace").lower()
from warnings import warn
if "demoted" in lcase_err_text or "demoting" in lcase_err_text:
warn(
"nvcc said it demoted types in source code it "
"compiled--this is likely not what you want.",
stacklevel=4,
)
warn(
"The CUDA compiler succeeded, but said the following:\n"
+ (stdout + stderr).decode("utf-8", "replace"),
stacklevel=4,
)
result_data = result_f.read()
result_f.close()
if cache_dir:
outf = open(cache_path, "wb")
outf.write(result_data)
outf.close()
if not keep:
from os import listdir, unlink, rmdir
for name in listdir(file_dir):
unlink(join(file_dir, name))
rmdir(file_dir)
return result_data
def _get_per_user_string():
try:
from os import getuid
except ImportError:
checksum = _new_md5()
from os import environ
checksum.update(environ["USERNAME"].encode("utf-8"))
return checksum.hexdigest()
else:
return "uid%d" % getuid()
def _find_pycuda_include_path():
import importlib.util
import os
return os.path.abspath(
os.path.join(importlib.util.find_spec("pycuda").origin,
os.path.pardir, "cuda"))
DEFAULT_NVCC_FLAGS = [
_flag.strip()
for _flag in os.environ.get("PYCUDA_DEFAULT_NVCC_FLAGS", "").split()
if _flag.strip()
]
def compile(
source,
nvcc="nvcc",
options=None,
keep=False,
no_extern_c=False,
arch=None,
code=None,
cache_dir=None,
include_dirs=[],
target="cubin",
):
assert target in ["cubin", "ptx", "fatbin"]
if not no_extern_c:
source = 'extern "C" {\n%s\n}\n' % source
if options is None:
options = DEFAULT_NVCC_FLAGS
options = options[:]
if arch is None:
from pycuda.driver import Error
try:
from pycuda.driver import Context
arch = "sm_%d%d" % Context.get_device().compute_capability()
except Error:
pass
from pycuda.driver import CUDA_DEBUGGING
if CUDA_DEBUGGING:
cache_dir = False
keep = True
options.extend(["-g", "-G"])
if "PYCUDA_CACHE_DIR" in os.environ and cache_dir is None:
cache_dir = os.environ["PYCUDA_CACHE_DIR"]
if "PYCUDA_DISABLE_CACHE" in os.environ:
cache_dir = False
if cache_dir is None:
import platformdirs
cache_dir = os.path.join(
platformdirs.user_cache_dir("pycuda", "pycuda"), "compiler-cache-v1"
)
from os import makedirs
makedirs(cache_dir, exist_ok=True)
if arch is not None:
options.extend(["-arch", arch])
if code is not None:
options.extend(["-code", code])
if "darwin" in sys.platform and sys.maxsize == 9223372036854775807:
options.append("-m64")
elif "win32" in sys.platform and sys.maxsize == 9223372036854775807:
options.append("-m64")
elif "win32" in sys.platform and sys.maxsize == 2147483647:
options.append("-m32")
include_dirs = include_dirs + [_find_pycuda_include_path()]
for i in include_dirs:
options.append("-I" + i)
return compile_plain(source, options, keep, nvcc, cache_dir, target)
class CudaModule:
def _check_arch(self, arch):
if arch is None:
return
try:
from pycuda.driver import Context
capability = Context.get_device().compute_capability()
if tuple(map(int, tuple(arch.split("_")[1]))) > capability:
from warnings import warn
warn(
"trying to compile for a compute capability "
"higher than selected GPU"
)
except Exception:
pass
def _bind_module(self):
self.get_global = self.module.get_global
self.get_texref = self.module.get_texref
if hasattr(self.module, "get_surfref"):
self.get_surfref = self.module.get_surfref
def get_function(self, name):
return self.module.get_function(name)
class SourceModule(CudaModule):
"""
Creates a Module from a single .cu source object linked against the
static CUDA runtime.
"""
def __init__(
self,
source,
nvcc="nvcc",
options=None,
keep=False,
no_extern_c=False,
arch=None,
code=None,
cache_dir=None,
include_dirs=[],
):
self._check_arch(arch)
cubin = compile(
source,
nvcc,
options,
keep,
no_extern_c,
arch,
code,
cache_dir,
include_dirs,
)
from pycuda.driver import module_from_buffer
self.module = module_from_buffer(cubin)
self._bind_module()
def _search_on_path(filenames):
"""Find file on system path."""
# http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/52224
from os.path import exists, abspath, join
from os import pathsep, environ
search_path = environ["PATH"]
paths = search_path.split(pathsep)
for path in paths:
for filename in filenames:
if exists(join(path, filename)):
return abspath(join(path, filename))
@memoize
def _find_nvcc_on_path():
return _search_on_path(["nvcc", "nvcc.exe"])
class DynamicModule(CudaModule):
"""
Creates a Module from multiple .cu source, library file and/or data
objects linked against the static or dynamic CUDA runtime.
"""
def __init__(
self,
nvcc="nvcc",
link_options=None,
keep=False,
no_extern_c=False,
arch=None,
code=None,
cache_dir=None,
include_dirs=[],
message_handler=None,
log_verbose=False,
cuda_libdir=None,
):
from pycuda.driver import Context
compute_capability = Context.get_device().compute_capability()
if compute_capability < (3, 5):
raise Exception(
"Minimum compute capability for dynamic parallelism is 3.5 (found: %u.%u)!"
% (compute_capability[0], compute_capability[1])
)
else:
from pycuda.driver import Linker
self.linker = Linker(message_handler, link_options, log_verbose)
self._check_arch(arch)
self.nvcc = nvcc
self.keep = keep
self.no_extern_c = no_extern_c
self.arch = arch
self.code = code
self.cache_dir = cache_dir
self.include_dirs = include_dirs
self.cuda_libdir = cuda_libdir
self.libdir, self.libptn = None, None
self.module = None
def _locate_cuda_libdir(self):
"""
Locate the "standard" CUDA SDK library directory in the local
file system. Supports 64-Bit Windows, Linux and Mac OS X.
In case the caller supplied cuda_libdir in the constructor
other than None that value is returned unchecked, else a
best-effort attempt is made.
Precedence:
Windows: cuda_libdir > %CUDA_PATH%
Linux: cuda_libdir > $CUDA_ROOT > $LD_LIBRARY_PATH > '/usr/lib/x86_64-linux-gnu'
Returns a pair (libdir, libptn) where libdir is None in case
of failure or a string containing the absolute path of the
directory, and libptn is the %-format pattern to construct
library file names from library names on the local system.
Raises a RuntimeError in case of failure.
Links:
- Post-installation Actions
http://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#post-installation-actions
TODO:
- Is $CUDA_ROOT/lib64 the correct path to assume for 64-Bit CUDA libraries on Linux?
- Mac OS X (Darwin) is currently treated like Linux, is that correct?
- Check CMake's FindCUDA module, it might contain some helpful clues in its sources
https://cmake.org/cmake/help/v3.0/module/FindCUDA.html
https://github.com/Kitware/CMake/blob/master/Modules/FindCUDA.cmake
- Verify all Linux code paths somehow
"""
from os.path import isfile, join
from platform import system as platform_system
system = platform_system()
libdir, libptn = None, None
if system == "Windows":
if self.cuda_libdir is not None:
libdir = self.cuda_libdir
elif "CUDA_PATH" in os.environ and isfile(
join(os.environ["CUDA_PATH"], "lib\\x64\\cudadevrt.lib")
):
libdir = join(os.environ["CUDA_PATH"], "lib\\x64")
libptn = "%s.lib"
elif system in ["Linux", "Darwin"]:
if self.cuda_libdir is not None:
libdir = self.cuda_libdir
elif "CUDA_ROOT" in os.environ and isfile(
join(os.environ["CUDA_ROOT"], "lib64/libcudadevrt.a")
):
libdir = join(os.environ["CUDA_ROOT"], "lib64")
elif "LD_LIBRARY_PATH" in os.environ:
for ld_path in os.environ["LD_LIBRARY_PATH"].split(":"):
if isfile(join(ld_path, "libcudadevrt.a")):
libdir = ld_path
break
if libdir is None and isfile("/usr/lib/x86_64-linux-gnu/libcudadevrt.a"):
libdir = "/usr/lib/x86_64-linux-gnu"
if libdir is None:
nvcc_path = _find_nvcc_on_path()
if nvcc_path is not None:
libdir = join(os.path.dirname(nvcc_path), "..", "lib64")
libptn = "lib%s.a"
if libdir is None:
raise RuntimeError(
"Unable to locate the CUDA SDK installation "
"directory, set CUDA library path manually"
)
return libdir, libptn
def add_source(self, source, nvcc_options=None, name="kernel.ptx"):
ptx = compile(
source,
nvcc=self.nvcc,
options=nvcc_options,
keep=self.keep,
no_extern_c=self.no_extern_c,
arch=self.arch,
code=self.code,
cache_dir=self.cache_dir,
include_dirs=self.include_dirs,
target="ptx",
)
from pycuda.driver import jit_input_type
self.linker.add_data(ptx, jit_input_type.PTX, name)
return self
def add_data(self, data, input_type, name="unknown"):
self.linker.add_data(data, input_type, name)
return self
def add_file(self, filename, input_type):
self.linker.add_file(filename, input_type)
return self
def add_stdlib(self, libname):
if self.libdir is None:
self.libdir, self.libptn = self._locate_cuda_libdir()
from os.path import isfile, join
libpath = join(self.libdir, self.libptn % libname)
if not isfile(libpath):
raise OSError('CUDA SDK library file "%s" not found' % libpath)
from pycuda.driver import jit_input_type
self.linker.add_file(libpath, jit_input_type.LIBRARY)
return self
def link(self):
self.module = self.linker.link_module()
self.linker = None
self._bind_module()
return self
class DynamicSourceModule(DynamicModule):
"""
Creates a Module from a single .cu source object linked against the
dynamic CUDA runtime.
- compiler generates PTX relocatable device code (rdc) from source that
can be linked with other relocatable device code
- source is linked against the CUDA device runtime library cudadevrt
- library cudadevrt is statically linked into the generated Module
"""
def __init__(
self,
source,
nvcc="nvcc",
options=None,
keep=False,
no_extern_c=False,
arch=None,
code=None,
cache_dir=None,
include_dirs=[],
cuda_libdir=None,
):
super().__init__(
nvcc=nvcc,
link_options=None,
keep=keep,
no_extern_c=no_extern_c,
arch=arch,
code=code,
cache_dir=cache_dir,
include_dirs=include_dirs,
cuda_libdir=cuda_libdir,
)
if options is None:
options = DEFAULT_NVCC_FLAGS
options = options[:]
if "-rdc=true" not in options:
options.append("-rdc=true")
if "-lcudadevrt" not in options:
options.append("-lcudadevrt")
self.add_source(source, nvcc_options=options)
# self.add_stdlib("cudadevrt")
self.link()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment