Commit 5cbe635d authored by zhanghj2's avatar zhanghj2
Browse files

支持aicc编译

parent 902e3032
...@@ -31,7 +31,7 @@ def get_features_args(): ...@@ -31,7 +31,7 @@ def get_features_args():
def get_arch_flags(): def get_arch_flags():
arch_flags = [] arch_flags = []
arch_flags.append("--offload-arch=gfx938;gfx936;gfx928") arch_flags.append("--offload-arch=gfx938,gfx936,gfx928")
return arch_flags return arch_flags
# def get_nvcc_thread_args(): # def get_nvcc_thread_args():
...@@ -41,12 +41,28 @@ def get_arch_flags(): ...@@ -41,12 +41,28 @@ def get_arch_flags():
# subprocess.run(["git", "submodule", "update", "--init", "csrc/cutlass"]) # subprocess.run(["git", "submodule", "update", "--init", "csrc/cutlass"])
this_dir = os.path.dirname(os.path.abspath(__file__)) this_dir = os.path.dirname(os.path.abspath(__file__))
os.environ['PYTORCH_NVCC'] = 'aicc'
if False: if False:
cxx_args = ["/O2", "/std:c++20", "/DNDEBUG", "/W0"] cxx_args = ["/O2", "/std:c++20", "/DNDEBUG", "/W0"]
else: else:
cxx_args = ["-O3", "-std=c++20", "-DNDEBUG", "-Wno-deprecated-declarations", "-DDCU_ASM", "-Wno-return-type", ] cxx_args = ["-O3", "-std=c++20", "-DNDEBUG", "-Wno-deprecated-declarations", "-DDCU_ASM", "-Wno-return-type", ]
aicc_flags = [
"-mcode-object-version=5",
"-mllvm=-support-768-vgprs=true",
"-mllvm=-disable-machine-sink",
"-mllvm=-disable-code-sink",
"-mllvm=-amdgpu-enable-rewrite-partial-reg-uses=false",
"-mllvm=-allow-gvn-convergent-call=true",
"-mllvm=-disallow-uniform-vmed3-combine=true",
"-mllvm=-hcu-pre-emit-load-store-opt=false",
"-mllvm=-amdgpu-early-inline-all=true",
"-mllvm=-amdgpu-function-calls=false",
"-fno-finite-math-only",
"--gpu-max-threads-per-block=256"
]
ext_modules = [] ext_modules = []
ext_modules.append( ext_modules.append(
...@@ -103,11 +119,11 @@ ext_modules.append( ...@@ -103,11 +119,11 @@ ext_modules.append(
"-ftemplate-backtrace-limit=0", "-ftemplate-backtrace-limit=0",
"-Rpass-analysis=kernel-resource-usage", "-Rpass-analysis=kernel-resource-usage",
"-DDCU_ASM", "-DDCU_ASM",
"--save-temps", # "--save-temps",
"-w", "-w",
"-mllvm -enable-num-vgprs-512=true", # "-mllvm -enable-num-vgprs-512=true",
"-mllvm -allow-cse-cross-bb-convergent-call=true", # "-mllvm -allow-cse-cross-bb-convergent-call=true",
"-mllvm -full-vectorize-slp=true", # "-mllvm -full-vectorize-slp=true",
] + get_features_args() + get_arch_flags() ] + get_features_args() + get_arch_flags()
}, },
include_dirs=[ include_dirs=[
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment