# SPDX-License-Identifier: MIT import os import shutil import sys from setuptools import Distribution, setup # !!!!!!!!!!!!!!!! never import aiter # from aiter.jit import core this_dir = os.path.dirname(os.path.abspath(__file__)) sys.path.insert(0, f"{this_dir}/aiter/") from concurrent.futures import ThreadPoolExecutor import time from jit import core from jit.utils.cpp_extension import IS_HIP_EXTENSION, BuildExtension ck_dir = os.environ.get("CK_DIR", f"{this_dir}/3rdparty/composable_kernel") PACKAGE_NAME = "aiter" BUILD_TARGET = os.environ.get("BUILD_TARGET", "auto") if BUILD_TARGET == "auto": if IS_HIP_EXTENSION: IS_ROCM = True else: IS_ROCM = False else: if BUILD_TARGET == "cuda": IS_ROCM = False elif BUILD_TARGET == "rocm": IS_ROCM = True FORCE_CXX11_ABI = False PREBUILD_KERNELS = int(os.environ.get("PREBUILD_KERNELS", 0)) PREBUILD_LOG_PROGRESS = int(os.environ.get("AITER_PREBUILD_LOG_PROGRESS", 1)) PREBUILD_VERBOSE = int(os.environ.get("AITER_PREBUILD_VERBOSE", 0)) def getMaxJobs(): # calculate the maximum allowed NUM_JOBS based on cores max_num_jobs_cores = max(1, os.cpu_count() * 0.8) import psutil # calculate the maximum allowed NUM_JOBS based on free memory free_memory_gb = psutil.virtual_memory().available / (1024**3) # free memory in GB max_num_jobs_memory = int(free_memory_gb / 0.5) # assuming 0.5 GB per job # pick lower value of jobs based on cores vs memory metric to minimize oom and swap usage during compilation max_jobs = int(max(1, min(max_num_jobs_cores, max_num_jobs_memory))) return max_jobs def is_develop_mode(): for arg in sys.argv: if arg == "develop": return True # pip install -e elif "editable" in arg: return True else: return False if is_develop_mode(): with open("./aiter/install_mode", "w") as f: f.write("develop") else: with open("./aiter/install_mode", "w") as f: f.write("install") if IS_ROCM: assert os.path.exists( ck_dir ), 'CK is needed by aiter, please make sure clone by "git clone --recursive https://gerrit.roc.cn:443/ROCm/composable_kernel" or "git submodule sync ; git submodule update --init --recursive"' if PREBUILD_KERNELS == 1: exclude_ops = [ "libmha_fwd", "libmha_bwd", "module_fmha_v3_fwd", "module_mha_fwd", "module_mha_varlen_fwd", "module_mha_batch_prefill", "module_fmha_v3_bwd", "module_fmha_v3_varlen_bwd", "module_fmha_v3_varlen_fwd", "module_mha_bwd", "module_mha_varlen_bwd", ] all_opts_args_build, prebuild_link_param = core.get_args_of_build( "all", exclude=exclude_ops ) # os.system(f"rm -rf {core.get_user_jit_dir()}/build") # os.system(f"rm -rf {core.get_user_jit_dir()}/*.so") prebuild_dir = f"{core.get_user_jit_dir()}/build/aiter_/build" if not os.path.exists(prebuild_dir): os.makedirs(prebuild_dir + "/srcs") if not os.path.exists("aiter_meta"): shutil.copytree("3rdparty", "aiter_meta/3rdparty") shutil.copytree("hsa", "aiter_meta/hsa") shutil.copytree("csrc", "aiter_meta/csrc") shutil.copytree("aiter/configs","aiter_meta/aiter/configs") shutil.copytree("gradlib", "aiter_meta/gradlib") def build_one_module(one_opt_args): md_name = one_opt_args["md_name"] if one_opt_args.get("skip_if", False): if PREBUILD_LOG_PROGRESS: print( f"[aiter-prebuild] SKIP module={md_name} (skip_if condition met)", flush=True, ) return # Incremental build: skip if the .so already exists so_path = os.path.join(core.get_user_jit_dir(), f"{md_name}.so") if os.path.exists(so_path) and not os.environ.get("AITER_REBUILD"): if PREBUILD_LOG_PROGRESS: print( f"[aiter-prebuild] SKIP module={md_name} (.so already exists)", flush=True, ) return start_ts = time.perf_counter() if PREBUILD_LOG_PROGRESS: print( f"[aiter-prebuild] START module={md_name}", flush=True, ) one_opt_args = dict(one_opt_args) if PREBUILD_VERBOSE: one_opt_args["verbose"] = True core.build_module( md_name=md_name, srcs=one_opt_args["srcs"], flags_extra_cc=one_opt_args["flags_extra_cc"], flags_extra_hip=one_opt_args["flags_extra_hip"], blob_gen_cmd=one_opt_args["blob_gen_cmd"], extra_include=one_opt_args["extra_include"], extra_ldflags=one_opt_args["extra_ldflags"], verbose=one_opt_args["verbose"], is_python_module=True, is_standalone=False, torch_exclude=False, hipify=one_opt_args["hipify"], ) if PREBUILD_LOG_PROGRESS: print( f"[aiter-prebuild] DONE module={md_name} cost={time.perf_counter()-start_ts:.1f}s", flush=True, ) # step 1, build *.cu -> module*.so prebuid_thread_num = 5 # Respect MAX_JOBS environment variable, fallback to auto-calculation max_jobs = os.environ.get("MAX_JOBS") if max_jobs is not None and max_jobs.isdigit() and int(max_jobs) > 0: prebuid_thread_num = min(prebuid_thread_num, int(max_jobs)) else: prebuid_thread_num = min(prebuid_thread_num, getMaxJobs()) os.environ["PREBUILD_THREAD_NUM"] = str(prebuid_thread_num) if PREBUILD_LOG_PROGRESS: print( f"[aiter-prebuild] thread_num={prebuid_thread_num}, PREBUILD_VERBOSE={PREBUILD_VERBOSE}", flush=True, ) with ThreadPoolExecutor(max_workers=prebuid_thread_num) as executor: list(executor.map(build_one_module, all_opts_args_build)) else: raise NotImplementedError("Only ROCM is supported") if os.path.exists("aiter_meta") and os.path.isdir("aiter_meta"): shutil.rmtree("aiter_meta") ## link "3rdparty", "hsa", "csrc" into "aiter_meta" shutil.copytree("3rdparty", "aiter_meta/3rdparty") shutil.copytree("hsa", "aiter_meta/hsa") shutil.copytree("csrc", "aiter_meta/csrc") shutil.copytree("aiter/configs","aiter_meta/aiter/configs") class NinjaBuildExtension(BuildExtension): def __init__(self, *args, **kwargs) -> None: # Respect MAX_JOBS environment variable, fallback to auto-calculation max_jobs_env = os.environ.get("MAX_JOBS") if max_jobs_env is None: # Only calculate max_jobs if MAX_JOBS is not set max_jobs = getMaxJobs() os.environ["MAX_JOBS"] = str(max_jobs) else: # Validate the provided MAX_JOBS value try: int(max_jobs_env) if int(max_jobs_env) <= 0: raise ValueError("MAX_JOBS must be a positive integer") except ValueError: # If invalid, fallback to auto-calculation max_jobs = getMaxJobs() os.environ["MAX_JOBS"] = str(max_jobs) super().__init__(*args, **kwargs) setup_requires = [ "packaging", "psutil", "ninja", "setuptools_scm", ] if PREBUILD_KERNELS == 1: setup_requires.append("pandas") class ForcePlatlibDistribution(Distribution): def has_ext_modules(self): return True setup( name=PACKAGE_NAME, use_scm_version=True, packages=["aiter_meta", "aiter"], include_package_data=True, package_data={ "": ["*"], }, classifiers=[ "Programming Language :: Python :: 3", "License :: OSI Approved :: BSD License", "Operating System :: Unix", ], # ext_modules=ext_modules, cmdclass={"build_ext": NinjaBuildExtension}, python_requires=">=3.8", install_requires=[ "pybind11>=3.0.1", "ninja", "pandas", "einops", "psutil", ], setup_requires=setup_requires, distclass=ForcePlatlibDistribution, ) if os.path.exists("aiter_meta") and os.path.isdir("aiter_meta"): shutil.rmtree("aiter_meta")