Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
OpenDAS
DeepEP
Commits
be8053d6
Commit
be8053d6
authored
Jul 31, 2025
by
Chenggang Zhao
Browse files
Fix SM80 compilation
parent
227c3589
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
4 additions
and
2 deletions
+4
-2
csrc/kernels/utils.cuh
csrc/kernels/utils.cuh
+1
-1
setup.py
setup.py
+3
-1
No files found.
csrc/kernels/utils.cuh
View file @
be8053d6
...
...
@@ -158,7 +158,7 @@ __device__ __forceinline__ int64_t ld_volatile_global(const uint64_t *ptr) {
#ifndef DISABLE_AGGRESSIVE_PTX_INSTRS
#define LD_NC_FUNC "ld.global.nc.L1::no_allocate.L2::256B"
#else
#define LD_NC_FUNC "ld.volatile.global
.L2::256B
"
#define LD_NC_FUNC "ld.volatile.global"
#endif
// `ld.global.nc.L1::no_allocate` will be translated into `LDG.E.NA.[width].CONSTANT` in SASS
...
...
setup.py
View file @
be8053d6
...
...
@@ -5,13 +5,15 @@ import importlib
import
importlib.resources
from
torch.utils.cpp_extension
import
BuildExtension
,
CUDAExtension
# Wheel specific: The wheels only include the soname of the host library (libnvshmem_host.so.X)
# Wheel specific: the wheels only include the soname of the host library `libnvshmem_host.so.X`
def
get_nvshmem_host_lib_name
():
for
path
in
importlib
.
resources
.
files
(
'nvidia.nvshmem'
).
iterdir
():
for
file
in
path
.
rglob
(
'libnvshmem_host.so.*'
):
return
file
.
name
raise
ModuleNotFoundError
(
'libnvshmem_host.so not found'
)
if
__name__
==
'__main__'
:
disable_nvshmem
=
False
nvshmem_dir
=
os
.
getenv
(
'NVSHMEM_DIR'
,
None
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment