Commit be8053d6 authored by Chenggang Zhao's avatar Chenggang Zhao
Browse files

Fix SM80 compilation

parent 227c3589
......@@ -158,7 +158,7 @@ __device__ __forceinline__ int64_t ld_volatile_global(const uint64_t *ptr) {
#ifndef DISABLE_AGGRESSIVE_PTX_INSTRS
#define LD_NC_FUNC "ld.global.nc.L1::no_allocate.L2::256B"
#else
#define LD_NC_FUNC "ld.volatile.global.L2::256B"
#define LD_NC_FUNC "ld.volatile.global"
#endif
// `ld.global.nc.L1::no_allocate` will be translated into `LDG.E.NA.[width].CONSTANT` in SASS
......
......@@ -5,13 +5,15 @@ import importlib
import importlib.resources
from torch.utils.cpp_extension import BuildExtension, CUDAExtension
# Wheel specific: The wheels only include the soname of the host library (libnvshmem_host.so.X)
# Wheel specific: the wheels only include the soname of the host library `libnvshmem_host.so.X`
def get_nvshmem_host_lib_name():
for path in importlib.resources.files('nvidia.nvshmem').iterdir():
for file in path.rglob('libnvshmem_host.so.*'):
return file.name
raise ModuleNotFoundError('libnvshmem_host.so not found')
if __name__ == '__main__':
disable_nvshmem = False
nvshmem_dir = os.getenv('NVSHMEM_DIR', None)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment