ninja_required_version = 1.3 cxx = c++ nvcc = /public/software/compiler/rocm/dtk-22.10/bin/hipcc cflags = -DTORCH_EXTENSION_NAME=fused_mix_prec_layer_norm_cuda -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /public/home/zhuwenwen/miniconda3/envs/megatron-lm/lib/python3.7/site-packages/torch/include -isystem /public/home/zhuwenwen/miniconda3/envs/megatron-lm/lib/python3.7/site-packages/torch/include/torch/csrc/api/include -isystem /public/home/zhuwenwen/miniconda3/envs/megatron-lm/lib/python3.7/site-packages/torch/include/TH -isystem /public/home/zhuwenwen/miniconda3/envs/megatron-lm/lib/python3.7/site-packages/torch/include/THC -isystem /public/home/zhuwenwen/miniconda3/envs/megatron-lm/lib/python3.7/site-packages/torch/include/THH -isystem /public/software/compiler/rocm/dtk-22.10/include -isystem /public/software/compiler/rocm/dtk-22.10/miopen/include -isystem /public/home/zhuwenwen/miniconda3/envs/megatron-lm/include/python3.7m -D_GLIBCXX_USE_CXX11_ABI=0 -fPIC -std=c++14 -O3 post_cflags = cuda_cflags = -DWITH_HIP -DTORCH_EXTENSION_NAME=fused_mix_prec_layer_norm_cuda -DTORCH_API_INCLUDE_EXTENSION_H -DPYBIND11_COMPILER_TYPE=\"_gcc\" -DPYBIND11_STDLIB=\"_libstdcpp\" -DPYBIND11_BUILD_ABI=\"_cxxabi1011\" -isystem /public/home/zhuwenwen/miniconda3/envs/megatron-lm/lib/python3.7/site-packages/torch/include -isystem /public/home/zhuwenwen/miniconda3/envs/megatron-lm/lib/python3.7/site-packages/torch/include/torch/csrc/api/include -isystem /public/home/zhuwenwen/miniconda3/envs/megatron-lm/lib/python3.7/site-packages/torch/include/TH -isystem /public/home/zhuwenwen/miniconda3/envs/megatron-lm/lib/python3.7/site-packages/torch/include/THC -isystem /public/home/zhuwenwen/miniconda3/envs/megatron-lm/lib/python3.7/site-packages/torch/include/THH -isystem /public/software/compiler/rocm/dtk-22.10/include -isystem /public/software/compiler/rocm/dtk-22.10/miopen/include -isystem /public/home/zhuwenwen/miniconda3/envs/megatron-lm/include/python3.7m -D_GLIBCXX_USE_CXX11_ABI=0 -fPIC -std=c++14 -O3 -fPIC -D__HIP_PLATFORM_HCC__=1 -DCUDA_HAS_FP16=1 -D__HIP_NO_HALF_OPERATORS__=1 -D__HIP_NO_HALF_CONVERSIONS__=1 -O3 --amdgpu-target=gfx906 -U__CUDA_NO_HALF_OPERATORS__ -U__CUDA_NO_HALF_CONVERSIONS__ -fno-gpu-rdc cuda_post_cflags = ldflags = -shared -L/public/home/zhuwenwen/miniconda3/envs/megatron-lm/lib/python3.7/site-packages/torch/lib -lc10 -lc10_hip -ltorch_cpu -ltorch_hip -ltorch -ltorch_python -L/public/software/compiler/rocm/dtk-22.10/lib -lamdhip64 rule compile command = $cxx -MMD -MF $out.d $cflags -c $in -o $out $post_cflags depfile = $out.d deps = gcc rule cuda_compile command = $nvcc $cuda_cflags -c $in -o $out $cuda_post_cflags rule link command = $cxx $in $ldflags -o $out build layer_norm_cuda.o: compile /public/home/zhuwenwen/Megatron-LM-3.0.2/megatron/fused_kernels/layer_norm_cuda.cpp build layer_norm_hip_kernel.cuda.o: cuda_compile /public/home/zhuwenwen/Megatron-LM-3.0.2/megatron/fused_kernels/layer_norm_hip_kernel.hip build fused_mix_prec_layer_norm_cuda.so: link layer_norm_cuda.o layer_norm_hip_kernel.cuda.o default fused_mix_prec_layer_norm_cuda.so