"git@developer.sourcefind.cn:renzhc/diffusers_dcu.git" did not exist on "f354dd9e2ffbf79707b82fe0b73f60a5ced0c4ac"
Unverified Commit 8a184b6b authored by Stas Bekman's avatar Stas Bekman Committed by GitHub
Browse files

[build] fix computer capability arch flags, add PTX, handle PTX (#591)



* fix arch flags, add PTX

* bug fix
Co-authored-by: default avatarJeff Rasley <jerasley@microsoft.com>
parent 0518252d
...@@ -221,7 +221,7 @@ class CUDAOpBuilder(OpBuilder): ...@@ -221,7 +221,7 @@ class CUDAOpBuilder(OpBuilder):
1. `TORCH_CUDA_ARCH_LIST` takes priority over `cross_compile_archs`. 1. `TORCH_CUDA_ARCH_LIST` takes priority over `cross_compile_archs`.
2. If neither is set default compute capabilities will be used 2. If neither is set default compute capabilities will be used
3. Under `jit_mode` compute capabilities of all visible cards will be used. 3. Under `jit_mode` compute capabilities of all visible cards will be used plus PTX
Format: Format:
...@@ -243,6 +243,7 @@ class CUDAOpBuilder(OpBuilder): ...@@ -243,6 +243,7 @@ class CUDAOpBuilder(OpBuilder):
if cc not in ccs: if cc not in ccs:
ccs.append(cc) ccs.append(cc)
ccs = sorted(ccs) ccs = sorted(ccs)
ccs[-1] += '+PTX'
else: else:
# Cross-compile mode, compile for various architectures # Cross-compile mode, compile for various architectures
# env override takes priority # env override takes priority
...@@ -260,8 +261,10 @@ class CUDAOpBuilder(OpBuilder): ...@@ -260,8 +261,10 @@ class CUDAOpBuilder(OpBuilder):
args = [] args = []
for cc in ccs: for cc in ccs:
cc = cc.replace('.', '') num = cc[0] + cc[2]
args.append(f'-gencode=arch=compute_{cc},code=compute_{cc}') args.append(f'-gencode=arch=compute_{num},code=sm_{num}')
if cc.endswith('+PTX'):
args.append(f'-gencode=arch=compute_{num},code=compute_{num}')
return args return args
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment