Commit f0ef3442 authored by yuguo960516yuguo's avatar yuguo960516yuguo
Browse files

2.3.2-dtk-22.10.1

parent ad08b8ce
Pipeline #227 failed with stages
in 0 seconds
if(NOT WITH_MLU)
return()
endif()
if(NOT ENV{NEUWARE_HOME})
set(NEUWARE_HOME "/usr/local/neuware")
else()
set(NEUWARE_HOME $ENV{NEUWARE_HOME})
endif()
message(STATUS "NEUWARE_HOME: " ${NEUWARE_HOME})
set(NEUWARE_INCLUDE_DIR ${NEUWARE_HOME}/include)
set(NEUWARE_LIB_DIR ${NEUWARE_HOME}/lib64)
include_directories(${NEUWARE_INCLUDE_DIR})
set(CNNL_LIB ${NEUWARE_LIB_DIR}/libcnnl.so)
set(MLUOP_LIB ${NEUWARE_LIB_DIR}/libmluops.so)
set(CNRT_LIB ${NEUWARE_LIB_DIR}/libcnrt.so)
set(CNDRV_LIB ${NEUWARE_LIB_DIR}/libcndrv.so)
set(CNPAPI_LIB ${NEUWARE_LIB_DIR}/libcnpapi.so)
generate_dummy_static_lib(LIB_NAME "neuware_lib" GENERATOR "neuware.cmake")
set(NEUWARE_LIB_DEPS ${CNNL_LIB} ${MLUOP_LIB} ${CNRT_LIB} ${CNDRV_LIB}
${CNPAPI_LIB})
if(WITH_CNCL)
message(STATUS "Compile with CNCL!")
add_definitions(-DPADDLE_WITH_CNCL)
set(CNCL_LIB ${NEUWARE_LIB_DIR}/libcncl.so)
list(APPEND NEUWARE_LIB_DEPS ${CNCL_LIB})
endif()
target_link_libraries(neuware_lib ${NEUWARE_LIB_DEPS})
# CMake file `unity_build` is used to handle Unity Build compilation.
include(unity_build)
set(PART_CUDA_KERNEL_FILES)
function(find_register FILENAME PATTERN OUTPUT)
# find the op_name of REGISTER_OPERATOR(op_name, ...), REGISTER_OP_CPU_KERNEL(op_name, ...) , etc.
# set op_name to OUTPUT
set(options "")
set(oneValueArgs "")
set(multiValueArgs "")
file(READ ${FILENAME} CONTENT)
# message ("number of arguments sent to function: ${ARGC}")
# message ("all function arguments: ${ARGV}")
# message("PATTERN ${PATTERN}")
string(REGEX MATCH "${PATTERN}\\([ \t\r\n]*[a-z0-9_]*," register "${CONTENT}")
if(NOT register STREQUAL "")
string(REPLACE "${PATTERN}(" "" register "${register}")
string(REPLACE "," "" register "${register}")
# [ \t\r\n]+ is used for blank characters.
# Here we use '+' instead of '*' since it is a REPLACE operation.
string(REGEX REPLACE "[ \t\r\n]+" "" register "${register}")
endif()
set(${OUTPUT}
${register}
PARENT_SCOPE)
endfunction()
function(op_library TARGET)
# op_library is a function to create op library. The interface is same as
# cc_library. But it handle split GPU/CPU code and link some common library
# for ops.
set(cc_srcs)
set(cu_srcs)
set(hip_srcs)
set(cu_cc_srcs)
set(hip_cc_srcs)
set(xpu_cc_srcs)
set(xpu_kp_cc_srcs)
set(npu_cc_srcs)
set(mlu_cc_srcs)
set(cudnn_cu_cc_srcs)
set(miopen_cu_cc_srcs)
set(cudnn_cu_srcs)
set(miopen_cu_srcs)
set(CUDNN_FILE)
set(MIOPEN_FILE)
set(mkldnn_cc_srcs)
set(MKLDNN_FILE)
set(op_common_deps operator op_registry math_function layer
common_infer_shape_functions)
if(WITH_ASCEND_CL)
set(op_common_deps ${op_common_deps} npu_op_runner)
endif()
if(WITH_MLU)
set(op_common_deps ${op_common_deps} mlu_baseop)
endif()
# Option `UNITY` is used to specify that operator `TARGET` will compiles with Unity Build.
set(options UNITY)
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
set(pybind_flag 0)
cmake_parse_arguments(op_library "${options}" "${oneValueArgs}"
"${multiValueArgs}" ${ARGN})
list(LENGTH op_library_SRCS op_library_SRCS_len)
if(${op_library_SRCS_len} EQUAL 0)
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cc)
list(APPEND cc_srcs ${TARGET}.cc)
endif()
if(WITH_GPU)
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu.cc)
list(APPEND cu_cc_srcs ${TARGET}.cu.cc)
endif()
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu)
list(APPEND cu_srcs ${TARGET}.cu)
endif()
# rename in KP: .kps -> .cu
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.kps)
file(COPY ${TARGET}.kps DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
file(RENAME ${CMAKE_CURRENT_BINARY_DIR}/${TARGET}.kps
${CMAKE_CURRENT_BINARY_DIR}/${TARGET}.cu)
list(APPEND cu_srcs ${CMAKE_CURRENT_BINARY_DIR}/${TARGET}.cu)
endif()
if(WITH_NV_JETSON)
list(REMOVE_ITEM cu_srcs "decode_jpeg_op.cu")
endif()
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu)
set(PART_CUDA_KERNEL_FILES
${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu
${PART_CUDA_KERNEL_FILES}
PARENT_SCOPE)
list(APPEND cu_srcs ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu)
endif()
string(REPLACE "_op" "_cudnn_op" CUDNN_FILE "${TARGET}")
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${CUDNN_FILE}.cu.cc)
list(APPEND cudnn_cu_cc_srcs ${CUDNN_FILE}.cu.cc)
endif()
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${CUDNN_FILE}.cu)
list(APPEND cudnn_cu_srcs ${CUDNN_FILE}.cu)
endif()
endif()
if(WITH_ROCM)
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu.cc)
list(APPEND hip_cc_srcs ${TARGET}.cu.cc)
endif()
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.cu)
list(APPEND hip_srcs ${TARGET}.cu)
endif()
# rename in KP: .kps -> .cu
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.kps)
file(COPY ${TARGET}.kps DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
file(RENAME ${CMAKE_CURRENT_BINARY_DIR}/${TARGET}.kps
${CMAKE_CURRENT_BINARY_DIR}/${TARGET}.cu)
list(APPEND hip_srcs ${CMAKE_CURRENT_BINARY_DIR}/${TARGET}.cu)
endif()
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu)
set(PART_CUDA_KERNEL_FILES
${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu
${PART_CUDA_KERNEL_FILES}
PARENT_SCOPE)
list(APPEND hip_srcs ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu)
endif()
string(REPLACE "_op" "_cudnn_op" MIOPEN_FILE "${TARGET}")
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${MIOPEN_FILE}.cu.cc)
list(APPEND miopen_cu_cc_srcs ${MIOPEN_FILE}.cu.cc)
endif()
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${MIOPEN_FILE}.cu)
list(APPEND miopen_cu_srcs ${MIOPEN_FILE}.cu)
endif()
endif()
if(WITH_MKLDNN)
string(REPLACE "_op" "_mkldnn_op" MKLDNN_FILE "${TARGET}")
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/mkldnn/${MKLDNN_FILE}.cc)
list(APPEND mkldnn_cc_srcs mkldnn/${MKLDNN_FILE}.cc)
endif()
endif()
if(WITH_XPU)
string(REPLACE "_op" "_op_xpu" XPU_FILE "${TARGET}")
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${XPU_FILE}.cc)
list(APPEND xpu_cc_srcs ${XPU_FILE}.cc)
endif()
endif()
if(WITH_XPU_KP)
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.xpu)
list(APPEND xpu_kp_cc_srcs ${TARGET}.xpu)
endif()
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.kps)
list(APPEND xpu_kp_cc_srcs ${TARGET}.kps)
endif()
endif()
if(WITH_ASCEND_CL)
string(REPLACE "_op" "_op_npu" NPU_FILE "${TARGET}")
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${NPU_FILE}.cc)
list(APPEND npu_cc_srcs ${NPU_FILE}.cc)
endif()
endif()
if(WITH_MLU)
string(REPLACE "_op" "_op_mlu" MLU_FILE "${TARGET}")
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${MLU_FILE}.cc)
list(APPEND mlu_cc_srcs ${MLU_FILE}.cc)
endif()
endif()
else()
foreach(src ${op_library_SRCS})
if(WITH_ROCM AND ${src} MATCHES ".*_cudnn_op.cu$")
list(APPEND miopen_cu_srcs ${src})
elseif(WITH_ROCM AND ${src} MATCHES ".*\\.cu$")
list(APPEND hip_srcs ${src})
elseif(WITH_ROCM AND ${src} MATCHES ".*_cudnn_op.cu.cc$")
list(APPEND miopen_cu_cc_srcs ${src})
elseif(WITH_ROCM AND ${src} MATCHES ".*\\.cu.cc$")
list(APPEND hip_cc_srcs ${src})
elseif(WITH_GPU AND ${src} MATCHES ".*_cudnn_op.cu$")
list(APPEND cudnn_cu_srcs ${src})
elseif(WITH_GPU AND ${src} MATCHES ".*\\.cu$")
list(APPEND cu_srcs ${src})
elseif(WITH_GPU AND ${src} MATCHES ".*_cudnn_op.cu.cc$")
list(APPEND cudnn_cu_cc_srcs ${src})
elseif(WITH_GPU AND ${src} MATCHES ".*\\.cu.cc$")
list(APPEND cu_cc_srcs ${src})
elseif(WITH_MKLDNN AND ${src} MATCHES ".*_mkldnn_op.cc$")
list(APPEND mkldnn_cc_srcs ${src})
elseif(WITH_XPU AND ${src} MATCHES ".*_op_xpu.cc$")
list(APPEND xpu_cc_srcs ${src})
elseif(WITH_XPU_KP AND ${src} MATCHES ".*\\.xpu$")
list(APPEND xpu_kp_cc_srcs ${src})
elseif(WITH_XPU_KP AND ${src} MATCHES ".*\\.kps$")
list(APPEND xpu_kp_cc_srcs ${src})
elseif(WITH_ASCEND_CL AND ${src} MATCHES ".*_op_npu.cc$")
list(APPEND npu_cc_srcs ${src})
elseif(WITH_MLU AND ${src} MATCHES ".*_op_mlu.cc$")
list(APPEND mlu_cc_srcs ${src})
elseif(${src} MATCHES ".*\\.cc$")
list(APPEND cc_srcs ${src})
else()
message(
FATAL_ERROR
"${TARGET} Source file ${src} should only be .cc or .cu or .xpu")
endif()
endforeach()
endif()
list(LENGTH xpu_cc_srcs xpu_cc_srcs_len)
list(LENGTH xpu_kp_cc_srcs xpu_kp_cc_srcs_len)
list(LENGTH cc_srcs cc_srcs_len)
if(${cc_srcs_len} EQUAL 0)
message(
FATAL_ERROR
"The op library ${TARGET} should contains at least one .cc file")
endif()
if(WIN32)
# remove windows unsupported op, because windows has no nccl, no warpctc such ops.
foreach(windows_unsupport_op "nccl_op" "gen_nccl_id_op")
if("${TARGET}" STREQUAL "${windows_unsupport_op}")
return()
endif()
endforeach()
endif()
# Unity Build relies on global option `WITH_UNITY_BUILD` and local option `UNITY`.
if(WITH_UNITY_BUILD AND op_library_UNITY)
# Generate the unity target name by the directory where source files located.
string(REPLACE "${PADDLE_SOURCE_DIR}/paddle/fluid/" "" UNITY_TARGET
${CMAKE_CURRENT_SOURCE_DIR})
string(REPLACE "/" "_" UNITY_TARGET ${UNITY_TARGET})
set(UNITY_TARGET "paddle_${UNITY_TARGET}_unity")
if(NOT ${UNITY_TARGET} IN_LIST OP_LIBRARY)
set(OP_LIBRARY
${UNITY_TARGET} ${OP_LIBRARY}
CACHE INTERNAL "op libs")
endif()
else()
set(OP_LIBRARY
${TARGET} ${OP_LIBRARY}
CACHE INTERNAL "op libs")
endif()
list(LENGTH op_library_DEPS op_library_DEPS_len)
if(${op_library_DEPS_len} GREATER 0)
set(DEPS_OPS
${TARGET} ${DEPS_OPS}
PARENT_SCOPE)
endif()
if(WITH_GPU)
# Unity Build relies on global option `WITH_UNITY_BUILD` and local option `UNITY`.
if(WITH_UNITY_BUILD AND op_library_UNITY)
# Combine the cc and cu source files.
compose_unity_target_sources(${UNITY_TARGET} cc ${cc_srcs} ${cu_cc_srcs}
${cudnn_cu_cc_srcs} ${mkldnn_cc_srcs})
compose_unity_target_sources(${UNITY_TARGET} cu ${cudnn_cu_srcs}
${cu_srcs})
if(TARGET ${UNITY_TARGET})
# If `UNITY_TARGET` exists, add source files to `UNITY_TARGET`.
target_sources(${UNITY_TARGET} PRIVATE ${unity_target_cc_sources}
${unity_target_cu_sources})
else()
# If `UNITY_TARGET` does not exist, create `UNITY_TARGET` with source files.
nv_library(
${UNITY_TARGET}
SRCS ${unity_target_cc_sources} ${unity_target_cu_sources}
DEPS ${op_library_DEPS} ${op_common_deps})
endif()
# Add alias library to handle dependencies.
add_library(${TARGET} ALIAS ${UNITY_TARGET})
else()
nv_library(
${TARGET}
SRCS ${cc_srcs} ${cu_cc_srcs} ${cudnn_cu_cc_srcs} ${cudnn_cu_srcs}
${mkldnn_cc_srcs} ${cu_srcs}
DEPS ${op_library_DEPS} ${op_common_deps})
endif()
elseif(WITH_ROCM)
list(REMOVE_ITEM miopen_cu_cc_srcs "affine_grid_cudnn_op.cu.cc")
list(REMOVE_ITEM miopen_cu_cc_srcs "grid_sampler_cudnn_op.cu.cc")
list(REMOVE_ITEM hip_srcs "cholesky_op.cu")
list(REMOVE_ITEM hip_srcs "cholesky_solve_op.cu")
list(REMOVE_ITEM hip_srcs "lu_op.cu")
list(REMOVE_ITEM hip_srcs "matrix_rank_op.cu")
list(REMOVE_ITEM hip_srcs "svd_op.cu")
list(REMOVE_ITEM hip_srcs "eigvalsh_op.cu")
list(REMOVE_ITEM hip_srcs "qr_op.cu")
list(REMOVE_ITEM hip_srcs "eigh_op.cu")
list(REMOVE_ITEM hip_srcs "lstsq_op.cu")
list(REMOVE_ITEM hip_srcs "multinomial_op.cu")
list(REMOVE_ITEM hip_srcs "decode_jpeg_op.cu")
hip_library(
${TARGET}
SRCS ${cc_srcs} ${hip_cc_srcs} ${miopen_cu_cc_srcs} ${miopen_cu_srcs}
${mkldnn_cc_srcs} ${hip_srcs}
DEPS ${op_library_DEPS} ${op_common_deps})
elseif(WITH_XPU_KP AND ${xpu_kp_cc_srcs_len} GREATER 0)
xpu_library(
${TARGET}
SRCS ${cc_srcs} ${mkldnn_cc_srcs} ${xpu_cc_srcs} ${xpu_kp_cc_srcs}
DEPS ${op_library_DEPS} ${op_common_deps})
else()
# deal with CANN version control while registering NPU operators before build
if(WITH_ASCEND_CL)
if(CANN_VERSION LESS 504000)
list(REMOVE_ITEM npu_cc_srcs "multinomial_op_npu.cc")
list(REMOVE_ITEM npu_cc_srcs "take_along_axis_op_npu.cc")
endif()
endif()
# Unity Build relies on global option `WITH_UNITY_BUILD` and local option `UNITY`.
if(WITH_UNITY_BUILD AND op_library_UNITY)
# Combine the cc source files.
compose_unity_target_sources(
${UNITY_TARGET}
cc
${cc_srcs}
${mkldnn_cc_srcs}
${xpu_cc_srcs}
${npu_cc_srcs}
${mlu_cc_srcs})
if(TARGET ${UNITY_TARGET})
# If `UNITY_TARGET` exists, add source files to `UNITY_TARGET`.
target_sources(${UNITY_TARGET} PRIVATE ${unity_target_cc_sources})
else()
# If `UNITY_TARGET` does not exist, create `UNITY_TARGET` with source files.
cc_library(
${UNITY_TARGET}
SRCS ${unity_target_cc_sources}
DEPS ${op_library_DEPS} ${op_common_deps})
endif()
# Add alias library to handle dependencies.
add_library(${TARGET} ALIAS ${UNITY_TARGET})
else()
cc_library(
${TARGET}
SRCS ${cc_srcs} ${mkldnn_cc_srcs} ${xpu_cc_srcs} ${npu_cc_srcs}
${mlu_cc_srcs}
DEPS ${op_library_DEPS} ${op_common_deps})
endif()
endif()
list(LENGTH cu_srcs cu_srcs_len)
list(LENGTH hip_srcs hip_srcs_len)
list(LENGTH cu_cc_srcs cu_cc_srcs_len)
list(LENGTH hip_cc_srcs hip_cc_srcs_len)
list(LENGTH mkldnn_cc_srcs mkldnn_cc_srcs_len)
list(LENGTH xpu_cc_srcs xpu_cc_srcs_len)
list(LENGTH miopen_cu_cc_srcs miopen_cu_cc_srcs_len)
list(LENGTH npu_cc_srcs npu_cc_srcs_len)
list(LENGTH mlu_cc_srcs mlu_cc_srcs_len)
# Define operators that don't need pybind here.
foreach(
manual_pybind_op
"compare_all_op"
"compare_op"
"logical_op"
"bitwise_op"
"nccl_op"
"tensor_array_read_write_op"
"tensorrt_engine_op"
"conv_fusion_op")
if("${TARGET}" STREQUAL "${manual_pybind_op}")
set(pybind_flag 1)
endif()
endforeach()
# The registration of USE_OP, please refer to paddle/fluid/framework/op_registry.h.
# Note that it's enough to just adding one operator to pybind in a *_op.cc file.
# And for detail pybind information, please see generated paddle/pybind/pybind.h.
set(ORIGINAL_TARGET ${TARGET})
string(REGEX REPLACE "_op" "" TARGET "${TARGET}")
foreach(cc_src ${cc_srcs})
# pybind USE_OP_ITSELF
set(op_name "")
find_register(${cc_src} "REGISTER_OPERATOR" op_name)
if(NOT ${op_name} EQUAL "")
file(APPEND ${pybind_file} "USE_OP_ITSELF(${op_name});\n")
# hack: for example, the target in conv_transpose_op.cc is conv2d_transpose, used in mkldnn
set(TARGET ${op_name})
set(pybind_flag 1)
endif()
set(op_name "")
find_register(${cc_src} "REGISTER_OP_WITHOUT_GRADIENT" op_name)
if(NOT ${op_name} EQUAL "")
file(APPEND ${pybind_file} "USE_OP_ITSELF(${op_name});\n")
# hack: for example, the target in conv_transpose_op.cc is conv2d_transpose, used in mkldnn
set(TARGET ${op_name})
set(pybind_flag 1)
endif()
# pybind USE_OP_DEVICE_KERNEL for CPU
set(op_name "")
find_register(${cc_src} "REGISTER_OP_CPU_KERNEL" op_name)
if(NOT ${op_name} EQUAL "")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, CPU);\n")
# why change TARGET here?
# when building padle with on_infer, the REGISTER_OPERATOR(*_grad) will be removed before compiling (see details in remove_grad_op_and_kernel.py)
# in elementwise_op.cc, it will find REGISTER_OPERATOR(grad_add) and set TARGET to grad_add
# and, in the following "mkldnn" part, it will add USE_OP_DEVICE_KERNEL(grad_add, MKLDNN) to pybind.h
# however, grad_add has no mkldnn kernel.
set(TARGET ${op_name})
set(pybind_flag 1)
endif()
endforeach()
# pybind USE_OP_DEVICE_KERNEL for CUDA
list(APPEND cu_srcs ${cu_cc_srcs})
# message("cu_srcs ${cu_srcs}")
foreach(cu_src ${cu_srcs})
set(op_name "")
find_register(${cu_src} "REGISTER_OP_CUDA_KERNEL" op_name)
if(NOT ${op_name} EQUAL "")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, CUDA);\n")
set(pybind_flag 1)
endif()
endforeach()
# pybind USE_OP_DEVICE_KERNEL for ROCm
list(APPEND hip_srcs ${hip_cc_srcs})
# message("hip_srcs ${hip_srcs}")
foreach(hip_src ${hip_srcs})
set(op_name "")
find_register(${hip_src} "REGISTER_OP_CUDA_KERNEL" op_name)
if(NOT ${op_name} EQUAL "")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, CUDA);\n")
set(pybind_flag 1)
endif()
endforeach()
# pybind USE_OP_DEVICE_KERNEL for CUDNN/MIOPEN
list(APPEND cudnn_cu_srcs ${cudnn_cu_cc_srcs})
list(APPEND cudnn_cu_srcs ${miopen_cu_cc_srcs})
list(APPEND cudnn_cu_srcs ${miopen_cu_srcs})
list(LENGTH cudnn_cu_srcs cudnn_cu_srcs_len)
#message("cudnn_cu_srcs ${cudnn_cu_srcs}")
if(${cudnn_cu_srcs_len} GREATER 0 AND ${ORIGINAL_TARGET} STREQUAL
"activation_op")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(relu, CUDNN);\n")
else()
foreach(cudnn_src ${cudnn_cu_srcs})
set(op_name "")
find_register(${cudnn_src} "REGISTER_OP_KERNEL" op_name)
if(NOT ${op_name} EQUAL "")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, CUDNN);\n")
set(pybind_flag 1)
endif()
endforeach()
endif()
if(WITH_XPU AND ${xpu_cc_srcs_len} GREATER 0)
if(${ORIGINAL_TARGET} STREQUAL "activation_op")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(relu, XPU);\n")
else()
foreach(xpu_src ${xpu_cc_srcs})
set(op_name "")
find_register(${xpu_src} "REGISTER_OP_XPU_KERNEL" op_name)
if(NOT ${op_name} EQUAL "")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, XPU);\n")
set(pybind_flag 1)
else()
find_register(${xpu_src} "REGISTER_OP_XPU_KERNEL_FUNCTOR" op_name)
if(NOT ${op_name} EQUAL "")
file(APPEND ${pybind_file}
"USE_OP_DEVICE_KERNEL(${op_name}, XPU);\n")
set(pybind_flag 1)
endif()
endif()
endforeach()
endif()
endif()
# pybind USE_OP_DEVICE_KERNEL for XPU KP
if(WITH_XPU_KP AND ${xpu_kp_cc_srcs_len} GREATER 0)
foreach(xpu_kp_src ${xpu_kp_cc_srcs})
set(op_name "")
find_register(${xpu_kp_src} "REGISTER_OP_KERNEL" op_name)
if(NOT ${op_name} EQUAL "")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, KP);\n")
message(STATUS "Building KP Target: ${op_name}")
set(pybind_flag 1)
endif()
endforeach()
endif()
# pybind USE_OP_DEVICE_KERNEL for NPU
if(WITH_ASCEND_CL AND ${npu_cc_srcs_len} GREATER 0)
foreach(npu_src ${npu_cc_srcs})
set(op_name "")
find_register(${npu_src} "REGISTER_OP_NPU_KERNEL" op_name)
if(NOT ${op_name} EQUAL "")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, NPU);\n")
set(pybind_flag 1)
endif()
endforeach()
endif()
# pybind USE_OP_DEVICE_KERNEL for MLU
if(WITH_MLU AND ${mlu_cc_srcs_len} GREATER 0)
foreach(mlu_src ${mlu_cc_srcs})
set(op_name "")
find_register(${mlu_src} "REGISTER_OP_MLU_KERNEL" op_name)
if(NOT ${op_name} EQUAL "")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, MLU);\n")
set(pybind_flag 1)
endif()
endforeach()
endif()
# pybind USE_OP_DEVICE_KERNEL for MKLDNN
if(WITH_MKLDNN AND ${mkldnn_cc_srcs_len} GREATER 0)
# Append first implemented MKLDNN activation operator
if(${MKLDNN_FILE} STREQUAL "activation_mkldnn_op")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(softplus, MKLDNN);\n")
elseif(${MKLDNN_FILE} STREQUAL "fc_mkldnn_op")
file(APPEND ${pybind_file}
"USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(fc, MKLDNN, FP32);\n")
file(APPEND ${pybind_file}
"USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(fc, MKLDNN, S8);\n")
file(APPEND ${pybind_file}
"USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(fc, MKLDNN, U8);\n")
else()
foreach(mkldnn_src ${mkldnn_cc_srcs})
set(op_name "")
find_register(${mkldnn_src} "REGISTER_OP_KERNEL" op_name)
if(NOT ${op_name} EQUAL "")
file(APPEND ${pybind_file}
"USE_OP_DEVICE_KERNEL(${op_name}, MKLDNN);\n")
set(pybind_flag 1)
endif()
endforeach()
endif()
endif()
# pybind USE_NO_KERNEL_OP
# HACK: if REGISTER_OP_CPU_KERNEL presents the operator must have kernel
string(REGEX MATCH "REGISTER_OP_CPU_KERNEL" regex_result "${TARGET_CONTENT}")
string(REPLACE "_op" "" TARGET "${TARGET}")
if(${pybind_flag} EQUAL 0 AND regex_result STREQUAL "")
file(APPEND ${pybind_file} "USE_NO_KERNEL_OP(${TARGET});\n")
set(pybind_flag 1)
endif()
# pybind USE_OP
if(${pybind_flag} EQUAL 0)
# NOTE(*): activation use macro to regist the kernels, set use_op manually.
if(${TARGET} STREQUAL "activation")
file(APPEND ${pybind_file} "USE_OP_ITSELF(relu);\n")
elseif(${TARGET} STREQUAL "fake_dequantize")
file(APPEND ${pybind_file} "USE_OP(fake_dequantize_max_abs);\n")
elseif(${TARGET} STREQUAL "fake_quantize")
file(APPEND ${pybind_file} "USE_OP(fake_quantize_abs_max);\n")
elseif(${TARGET} STREQUAL "tensorrt_engine_op")
message(
STATUS
"Pybind skips [tensorrt_engine_op], for this OP is only used in inference"
)
else()
file(APPEND ${pybind_file} "USE_OP(${TARGET});\n")
endif()
endif()
endfunction()
function(register_operators)
set(options "")
set(oneValueArgs "")
set(multiValueArgs EXCLUDES DEPS)
cmake_parse_arguments(register_operators "${options}" "${oneValueArgs}"
"${multiValueArgs}" ${ARGN})
file(
GLOB OPS
RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}"
"*_op.cc")
string(REPLACE "_mkldnn" "" OPS "${OPS}")
string(REPLACE "_xpu" "" OPS "${OPS}")
string(REPLACE "_npu" "" OPS "${OPS}")
string(REPLACE "_mlu" "" OPS "${OPS}")
string(REPLACE ".cc" "" OPS "${OPS}")
list(REMOVE_DUPLICATES OPS)
list(LENGTH register_operators_DEPS register_operators_DEPS_len)
foreach(src ${OPS})
list(FIND register_operators_EXCLUDES ${src} _index)
if(${_index} EQUAL -1)
if(${register_operators_DEPS_len} GREATER 0)
op_library(${src} UNITY DEPS ${register_operators_DEPS})
else()
op_library(${src} UNITY)
endif()
endif()
endforeach()
# Complete the processing of `UNITY_TARGET`.
if(WITH_UNITY_BUILD)
finish_unity_target(cc)
if(WITH_GPU)
finish_unity_target(cu)
endif()
endif()
endfunction()
<?xml version="1.0" encoding="utf-8"?>
<Project xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemDefinitionGroup>
<CudaCompile>
<!-- Project schema: Host properties -->
<UseHostDefines>true</UseHostDefines>
<Emulation>false</Emulation>
<HostDebugInfo Condition="'$(Configuration)' == 'Debug'">true</HostDebugInfo>
<HostDebugInfo Condition="'$(Configuration)' != 'Debug'">false</HostDebugInfo>
<FastMath>false</FastMath>
<Optimization>InheritFromHost</Optimization>
<Runtime>InheritFromHost</Runtime>
<RuntimeChecks>InheritFromHost</RuntimeChecks>
<TypeInfo>InheritFromHost</TypeInfo>
<Warning>InheritFromHost</Warning>
<BaseCommandLineTemplate>-ccbin "%(VCBinDir)" -x cu [GenerateRelocatableDeviceCode] [Include] [RequiredIncludes] [InterleaveSourceInPTX] [GPUDebugInfo] [GenerateLineInfo] [Keep] [KeepDir] [MaxRegCount] [PtxAsOptionV] [TargetMachinePlatform] [NvccCompilation] [CudaRuntime] [AdditionalOptions]</BaseCommandLineTemplate>
<BuildCommandLineTemplate>--use-local-env $(CudaClVersion)</BuildCommandLineTemplate>
<BuildDynamicCommandLineTemplate>[CodeGeneration]</BuildDynamicCommandLineTemplate>
<CleanCommandLineTemplate>-clean</CleanCommandLineTemplate>
<!-- <HostCommandLineTemplate>-Xcompiler &quot;/EHsc [Warning] /nologo [Optimization] $(CudaForceSynchronousPdbWrites) /Zi [RuntimeChecks] [Runtime] [TypeInfo]&quot;</HostCommandLineTemplate> -->
<HostCommandLineTemplate>-Xcompiler &quot;/EHsc [Warning] /nologo [Optimization] $(CudaForceSynchronousPdbWrites) [RuntimeChecks] [Runtime] [TypeInfo]&quot;</HostCommandLineTemplate>
<DriverApiCommandLineTemplate>%(BaseCommandLineTemplate) [CompileOut] "%(FullPath)"</DriverApiCommandLineTemplate>
<RuntimeApiCommandLineTemplate>%(BaseCommandLineTemplate) [HostDebugInfo] [Emulation] [FastMath] [Defines] %(HostCommandLineTemplate) [CompileOut] "%(FullPath)"</RuntimeApiCommandLineTemplate>
<CommandLineTemplate>
# (Approximate command-line. Settings inherited from host are not visible below.)
# (Please see the output window after a build for the full command-line)
# Driver API (NVCC Compilation Type is .cubin, .gpu, or .ptx)
set CUDAFE_FLAGS=--sdk_dir "$(WindowsSdkDir)"
"$(CudaToolkitNvccPath)" %(BuildCommandLineTemplate) %(DriverApiCommandLineTemplate)
# Runtime API (NVCC Compilation Type is hybrid object or .c file)
set CUDAFE_FLAGS=--sdk_dir "$(WindowsSdkDir)"
"$(CudaToolkitNvccPath)" %(BuildCommandLineTemplate) %(RuntimeApiCommandLineTemplate)
</CommandLineTemplate>
<ExecutionDescription>Compiling CUDA source file %(Identity)...</ExecutionDescription>
<ExclusionDescription>Skipping CUDA source file %(Identity) (excluded from build).</ExclusionDescription>
<!-- Miscellaneous -->
<PropsCacheOutputFile>%(Filename)%(Extension).cache</PropsCacheOutputFile>
<PropsCacheOutputPath>$(IntDir)%(PropsCacheOutputFile)</PropsCacheOutputPath>
<CudaCompileCoreProject>$(MSBuildProjectFullPath)</CudaCompileCoreProject>
</CudaCompile>
<CudaLink>
<PerformDeviceLink>true</PerformDeviceLink>
<LinkOut>$(IntDir)$(TargetName).device-link.obj</LinkOut>
<AdditionalLibraryDirectories></AdditionalLibraryDirectories>
<UseHostLibraryDirectories>true</UseHostLibraryDirectories>
<AdditionalDependencies></AdditionalDependencies>
<UseHostLibraryDependencies>true</UseHostLibraryDependencies>
<GPUDebugInfo>InheritFromProject</GPUDebugInfo>
<Optimization>InheritFromProject</Optimization>
<!-- Implicitly inherited from the project via @(CudaCompile) -->
<CodeGeneration></CodeGeneration>
<RuntimeChecks></RuntimeChecks>
<Runtime></Runtime>
<TargetMachinePlatform></TargetMachinePlatform>
<TypeInfo></TypeInfo>
<Warning></Warning>
<Inputs></Inputs>
<!-- <HostCommandLineTemplate>-Xcompiler &quot;/EHsc [Warning] /nologo [Optimization] /Zi [RuntimeChecks] [Runtime] [TypeInfo]&quot;</HostCommandLineTemplate> -->
<HostCommandLineTemplate>-Xcompiler &quot;/EHsc [Warning] /nologo [Optimization] [RuntimeChecks] [Runtime] [TypeInfo]&quot;</HostCommandLineTemplate>
<LinkCommandLineTemplate>"$(CudaToolkitNvccPath)" -dlink [LinkOut] %(HostCommandLineTemplate) [AdditionalLibraryDirectories] [AdditionalDependencies] [AdditionalOptions] [CodeGeneration] [GPUDebugInfo] [TargetMachinePlatform] [Inputs]</LinkCommandLineTemplate>
<CommandLineTemplate>
# (Approximate command-line. Settings inherited from host are not visible below.)
# (Please see the output window after a build for the full command-line)
%(LinkCommandLineTemplate)
</CommandLineTemplate>
</CudaLink>
<Link>
<AdditionalLibraryDirectories>%(AdditionalLibraryDirectories);$(CudaToolkitLibDir)</AdditionalLibraryDirectories>
</Link>
<ClCompile>
<AdditionalIncludeDirectories>%(AdditionalIncludeDirectories);$(CudaToolkitIncludeDir)</AdditionalIncludeDirectories>
</ClCompile>
</ItemDefinitionGroup>
</Project>
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
function(generate_unify_header DIR_NAME)
set(options "")
set(oneValueArgs HEADER_NAME SKIP_SUFFIX)
set(multiValueArgs EXCLUDES)
cmake_parse_arguments(generate_unify_header "${options}" "${oneValueArgs}"
"${multiValueArgs}" ${ARGN})
# get header name and suffix
set(header_name "${DIR_NAME}")
list(LENGTH generate_unify_header_HEADER_NAME
generate_unify_header_HEADER_NAME_len)
if(${generate_unify_header_HEADER_NAME_len} GREATER 0)
set(header_name "${generate_unify_header_HEADER_NAME}")
endif()
set(skip_suffix "")
list(LENGTH generate_unify_header_SKIP_SUFFIX
generate_unify_header_SKIP_SUFFIX_len)
if(${generate_unify_header_SKIP_SUFFIX_len} GREATER 0)
set(skip_suffix "${generate_unify_header_SKIP_SUFFIX}")
endif()
# exclude files
list(LENGTH generate_unify_header_EXCLUDES generate_unify_header_EXCLUDES_len)
# generate target header file
set(header_file ${CMAKE_CURRENT_SOURCE_DIR}/include/${header_name}.h)
file(
WRITE ${header_file}
"// Header file generated by paddle/phi/CMakeLists.txt for external users,\n// DO NOT edit or include it within paddle.\n\n#pragma once\n\n"
)
# get all top-level headers and write into header file
file(GLOB HEADERS "${CMAKE_CURRENT_SOURCE_DIR}\/${DIR_NAME}\/*.h")
foreach(header ${HEADERS})
if(${generate_unify_header_EXCLUDES_len} GREATER 0)
get_filename_component(header_file_name ${header} NAME)
list(FIND generate_unify_header_EXCLUDES ${header_file_name} _index)
if(NOT ${_index} EQUAL -1)
continue()
endif()
endif()
if("${skip_suffix}" STREQUAL "")
string(REPLACE "${PADDLE_SOURCE_DIR}\/" "" header "${header}")
file(APPEND ${header_file} "#include \"${header}\"\n")
else()
string(FIND "${header}" "${skip_suffix}.h" skip_suffix_found)
if(${skip_suffix_found} EQUAL -1)
string(REPLACE "${PADDLE_SOURCE_DIR}\/" "" header "${header}")
file(APPEND ${header_file} "#include \"${header}\"\n")
endif()
endif()
endforeach()
# append header into extension.h
string(REPLACE "${PADDLE_SOURCE_DIR}\/" "" header_file "${header_file}")
file(APPEND ${phi_extension_header_file} "#include \"${header_file}\"\n")
endfunction()
# call kernel_declare need to make sure whether the target of input exists
function(kernel_declare TARGET_LIST)
# message("TARGET LIST ${TARGET_LIST}")
foreach(kernel_path ${TARGET_LIST})
# message("kernel path ${kernel_path}" )
file(READ ${kernel_path} kernel_impl)
string(
REGEX
MATCH
"(PD_REGISTER_KERNEL|PD_REGISTER_GENERAL_KERNEL)\\([ \t\r\n]*[a-z0-9_]*,[[ \\\t\r\n\/]*[a-z0-9_]*]?[ \\\t\r\n]*[a-zA-Z]*,[ \\\t\r\n]*[A-Z_]*"
first_registry
"${kernel_impl}")
if(NOT first_registry STREQUAL "")
# some gpu kernel only can run on cuda, not support rocm, so we add this branch
if(WITH_ROCM OR WITH_NV_JETSON)
string(FIND "${first_registry}" "cuda_only" pos)
if(pos GREATER 1)
continue()
endif()
endif()
# parse the registerd kernel message
string(REPLACE "PD_REGISTER_KERNEL(" "" kernel_msg "${first_registry}")
string(REPLACE "PD_REGISTER_GENERAL_KERNEL(" "" kernel_msg
"${kernel_msg}")
string(REPLACE "," ";" kernel_msg "${kernel_msg}")
string(REGEX REPLACE "[ \\\t\r\n]+" "" kernel_msg "${kernel_msg}")
string(REGEX REPLACE "//cuda_only" "" kernel_msg "${kernel_msg}")
list(GET kernel_msg 0 kernel_name)
list(GET kernel_msg 1 kernel_backend)
list(GET kernel_msg 2 kernel_layout)
# append kernel declare into declarations.h
file(
APPEND ${kernel_declare_file}
"PD_DECLARE_KERNEL(${kernel_name}, ${kernel_backend}, ${kernel_layout});\n"
)
endif()
endforeach()
endfunction()
function(append_op_util_declare TARGET)
file(READ ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET} target_content)
string(
REGEX
MATCH
"(PD_REGISTER_BASE_KERNEL_NAME|PD_REGISTER_ARG_MAPPING_FN)\\([ \t\r\n]*[a-z0-9_]*"
util_registrar
"${target_content}")
string(REPLACE "PD_REGISTER_ARG_MAPPING_FN" "PD_DECLARE_ARG_MAPPING_FN"
util_declare "${util_registrar}")
string(REPLACE "PD_REGISTER_BASE_KERNEL_NAME" "PD_DECLARE_BASE_KERNEL_NAME"
util_declare "${util_declare}")
string(APPEND util_declare ");\n")
file(APPEND ${op_utils_header} "${util_declare}")
endfunction()
function(register_op_utils TARGET_NAME)
set(utils_srcs)
set(options "")
set(oneValueArgs "")
set(multiValueArgs EXCLUDES DEPS)
cmake_parse_arguments(register_op_utils "${options}" "${oneValueArgs}"
"${multiValueArgs}" ${ARGN})
file(
GLOB SIGNATURES
RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}"
"*_sig.cc")
foreach(target ${SIGNATURES})
append_op_util_declare(${target})
list(APPEND utils_srcs ${CMAKE_CURRENT_SOURCE_DIR}/${target})
endforeach()
cc_library(
${TARGET_NAME}
SRCS ${utils_srcs}
DEPS ${register_op_utils_DEPS})
endfunction()
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set(PADDLE_INFERENCE_INSTALL_DIR
"${CMAKE_BINARY_DIR}/paddle_inference_install_dir")
function(phi_header_path_compat TARGET_PATH)
message(STATUS "phi header path compat processing: ${TARGET_PATH}")
string(FIND ${TARGET_PATH} "experimental" pos)
if(pos GREATER 1)
file(GLOB HEADERS "${TARGET_PATH}/*" "*.h")
foreach(header ${HEADERS})
if(${header} MATCHES ".*.h$")
file(READ ${header} HEADER_CONTENT)
string(REPLACE "paddle/phi/" "paddle/include/experimental/phi/"
HEADER_CONTENT "${HEADER_CONTENT}")
string(REPLACE "paddle/utils/" "paddle/include/experimental/utils/"
HEADER_CONTENT "${HEADER_CONTENT}")
file(WRITE ${header} "${HEADER_CONTENT}")
message(STATUS "phi header path compat processing complete: ${header}")
endif()
endforeach()
endif()
endfunction()
phi_header_path_compat(
${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental)
phi_header_path_compat(
${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/api)
phi_header_path_compat(
${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/api/ext)
phi_header_path_compat(
${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/api/include)
phi_header_path_compat(
${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/common)
phi_header_path_compat(
${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/core)
# In order to be compatible with the original behavior, the header file name needs to be changed
file(RENAME
${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/extension.h
${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/ext_all.h)
# Find if a Python module is installed
# Found at http://www.cmake.org/pipermail/cmake/2011-January/041666.html
# To use do: find_python_module(PyQt4 REQUIRED)
function(find_python_module module)
string(TOUPPER ${module} module_upper)
if(NOT PY_${module_upper})
if(ARGC GREATER 1 AND ARGV1 STREQUAL "REQUIRED")
set(${module}_FIND_REQUIRED TRUE)
else()
set(${module}_FIND_REQUIRED FALSE)
endif()
# A module's location is usually a directory, but for binary modules
# it's a .so file.
execute_process(
COMMAND
"${PYTHON_EXECUTABLE}" "-c"
"import re, ${module}; print(re.compile('/__init__.py.*').sub('',${module}.__file__))"
RESULT_VARIABLE _${module}_status
OUTPUT_VARIABLE _${module}_location
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
if(NOT _${module}_status)
set(PY_${module_upper}
${_${module}_location}
CACHE STRING "Location of Python module ${module}")
endif()
endif()
find_package_handle_standard_args(PY_${module} DEFAULT_MSG PY_${module_upper})
if(NOT PY_${module_upper}_FOUND AND ${module}_FIND_REQUIRED)
message(FATAL_ERROR "python module ${module} is not found")
endif()
execute_process(
COMMAND "${PYTHON_EXECUTABLE}" "-c"
"import sys, ${module}; sys.stdout.write(${module}.__version__)"
OUTPUT_VARIABLE _${module}_version
RESULT_VARIABLE _${module}_status
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
if(NOT _${module}_status)
set(PY_${module_upper}_VERSION
${_${module}_version}
CACHE STRING "Version of Python module ${module}")
endif()
set(PY_${module_upper}_FOUND
${PY_${module_upper}_FOUND}
PARENT_SCOPE)
set(PY_${module_upper}_VERSION
${PY_${module_upper}_VERSION}
PARENT_SCOPE)
endfunction()
function(check_py_version py_version)
string(REPLACE "." ";" version_list ${py_version})
list(LENGTH version_list version_list_len)
if(version_list_len LESS 2)
message(FATAL_ERROR "Please input Python version, eg:3.7 or 3.8 and so on")
endif()
list(GET version_list 0 version_major)
list(GET version_list 1 version_minor)
if((version_major GREATER_EQUAL 3) AND (version_minor GREATER_EQUAL 7))
else()
message(FATAL_ERROR "Paddle only support Python version >=3.7 now!")
endif()
endfunction()
if(NOT WITH_ROCM)
return()
endif()
# Now we don't support RCCL on windows
if(WIN32)
return()
endif()
if(WITH_RCCL)
set(RCCL_ROOT
${ROCM_PATH}/rccl
CACHE PATH "RCCL ROOT")
find_path(
RCCL_INCLUDE_DIR rccl.h
PATHS ${RCCL_ROOT} ${RCCL_ROOT}/include ${RCCL_ROOT}/local/include
$ENV{RCCL_ROOT} $ENV{RCCL_ROOT}/include $ENV{RCCL_ROOT}/local/include
NO_DEFAULT_PATH)
file(READ ${RCCL_INCLUDE_DIR}/rccl.h RCCL_VERSION_FILE_CONTENTS)
string(REGEX MATCH "define NCCL_VERSION_CODE +([0-9]+)" RCCL_VERSION
"${RCCL_VERSION_FILE_CONTENTS}")
string(REGEX REPLACE "define NCCL_VERSION_CODE +([0-9]+)" "\\1" RCCL_VERSION
"${RCCL_VERSION}")
# 2604 for ROCM3.5 and 2708 for ROCM 3.9
message(STATUS "Current RCCL header is ${RCCL_INCLUDE_DIR}/rccl.h. "
"Current RCCL version is v${RCCL_VERSION}. ")
endif()
# This file is use to check all support level of AVX on your machine
# so that PaddlePaddle can unleash the vectorization power of muticore.
include(CheckCXXSourceRuns)
include(CheckCXXSourceCompiles)
if(CMAKE_COMPILER_IS_GNUCC
OR CMAKE_COMPILER_IS_GNUCXX
OR CMAKE_CXX_COMPILER_ID MATCHES "Clang")
set(MMX_FLAG "-mmmx")
set(SSE2_FLAG "-msse2")
set(SSE3_FLAG "-msse3")
set(AVX_FLAG "-mavx")
set(AVX2_FLAG "-mavx2")
set(AVX512F_FLAG "-mavx512f")
elseif(MSVC)
set(MMX_FLAG "/arch:MMX")
set(SSE2_FLAG "/arch:SSE2")
set(SSE3_FLAG "/arch:SSE3")
set(AVX_FLAG "/arch:AVX")
set(AVX2_FLAG "/arch:AVX2")
endif()
set(CMAKE_REQUIRED_FLAGS_RETAINED ${CMAKE_REQUIRED_FLAGS})
# Check MMX
set(CMAKE_REQUIRED_FLAGS ${MMX_FLAG})
set(MMX_FOUND_EXITCODE
1
CACHE STRING "Result from TRY_RUN" FORCE)
check_cxx_source_runs(
"
#include <mmintrin.h>
int main()
{
_mm_setzero_si64();
return 0;
}"
MMX_FOUND)
# Check SSE2
set(CMAKE_REQUIRED_FLAGS ${SSE2_FLAG})
set(SSE2_FOUND_EXITCODE
1
CACHE STRING "Result from TRY_RUN" FORCE)
check_cxx_source_runs(
"
#include <emmintrin.h>
int main()
{
_mm_setzero_si128();
return 0;
}"
SSE2_FOUND)
# Check SSE3
set(CMAKE_REQUIRED_FLAGS ${SSE3_FLAG})
set(SSE3_FOUND_EXITCODE
1
CACHE STRING "Result from TRY_RUN" FORCE)
check_cxx_source_runs(
"
#include <pmmintrin.h>
int main()
{
__m128d a = _mm_set1_pd(6.28);
__m128d b = _mm_set1_pd(3.14);
__m128d result = _mm_addsub_pd(a, b);
result = _mm_movedup_pd(result);
return 0;
}"
SSE3_FOUND)
# Check AVX
set(CMAKE_REQUIRED_FLAGS ${AVX_FLAG})
set(AVX_FOUND_EXITCODE
1
CACHE STRING "Result from TRY_RUN" FORCE)
check_cxx_source_runs(
"
#include <immintrin.h>
int main()
{
__m256 a = _mm256_set_ps(-1.0f, 2.0f, -3.0f, 4.0f, -1.0f, 2.0f, -3.0f, 4.0f);
__m256 b = _mm256_set_ps(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
__m256 result = _mm256_add_ps(a, b);
return 0;
}"
AVX_FOUND)
# Check AVX 2
set(CMAKE_REQUIRED_FLAGS ${AVX2_FLAG})
set(AVX2_FOUND_EXITCODE
1
CACHE STRING "Result from TRY_RUN" FORCE)
check_cxx_source_runs(
"
#include <immintrin.h>
int main()
{
__m256i a = _mm256_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4);
__m256i result = _mm256_abs_epi32 (a);
return 0;
}"
AVX2_FOUND)
# Check AVX512F
set(CMAKE_REQUIRED_FLAGS ${AVX512F_FLAG})
set(AVX512F_FOUND_EXITCODE
1
CACHE STRING "Result from TRY_RUN" FORCE)
check_cxx_source_runs(
"
#include <immintrin.h>
int main()
{
__m512i a = _mm512_set_epi32 (-1, 2, -3, 4, -1, 2, -3, 4,
13, -5, 6, -7, 9, 2, -6, 3);
__m512i result = _mm512_abs_epi32 (a);
return 0;
}"
AVX512F_FOUND)
set(CMAKE_REQUIRED_FLAGS ${CMAKE_REQUIRED_FLAGS_RETAINED})
mark_as_advanced(MMX_FOUND SSE2_FOUND SSE3_FOUND AVX_FOUND AVX2_FOUND
AVX512F_FOUND)
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Detects the OS and sets appropriate variables.
# CMAKE_SYSTEM_NAME only give us a coarse-grained name of the OS CMake is
# building for, but the host processor name like centos is necessary
# in some scenes to distinguish system for customization.
#
# for instance, protobuf libs path is <install_dir>/lib64
# on CentOS, but <install_dir>/lib on other systems.
if(UNIX AND NOT APPLE)
# except apple from nix*Os family
set(LINUX TRUE)
endif()
if(WIN32)
set(HOST_SYSTEM "win32")
else()
if(APPLE)
set(HOST_SYSTEM "macosx")
exec_program(
sw_vers ARGS
-productVersion
OUTPUT_VARIABLE HOST_SYSTEM_VERSION)
string(REGEX MATCH "[0-9]+.[0-9]+" MACOS_VERSION "${HOST_SYSTEM_VERSION}")
if(NOT DEFINED $ENV{MACOSX_DEPLOYMENT_TARGET})
# Set cache variable - end user may change this during ccmake or cmake-gui configure.
set(CMAKE_OSX_DEPLOYMENT_TARGET
${MACOS_VERSION}
CACHE
STRING
"Minimum OS X version to target for deployment (at runtime); newer APIs weak linked. Set to empty string for default value."
)
endif()
set(CMAKE_EXE_LINKER_FLAGS "-framework CoreFoundation -framework Security")
else()
if(EXISTS "/etc/issue")
file(READ "/etc/issue" LINUX_ISSUE)
if(LINUX_ISSUE MATCHES "CentOS")
set(HOST_SYSTEM "centos")
elseif(LINUX_ISSUE MATCHES "Debian")
set(HOST_SYSTEM "debian")
elseif(LINUX_ISSUE MATCHES "Ubuntu")
set(HOST_SYSTEM "ubuntu")
elseif(LINUX_ISSUE MATCHES "Red Hat")
set(HOST_SYSTEM "redhat")
elseif(LINUX_ISSUE MATCHES "Fedora")
set(HOST_SYSTEM "fedora")
endif()
string(REGEX MATCH "(([0-9]+)\\.)+([0-9]+)" HOST_SYSTEM_VERSION
"${LINUX_ISSUE}")
endif()
if(EXISTS "/etc/redhat-release")
file(READ "/etc/redhat-release" LINUX_ISSUE)
if(LINUX_ISSUE MATCHES "CentOS")
set(HOST_SYSTEM "centos")
endif()
endif()
if(NOT HOST_SYSTEM)
set(HOST_SYSTEM ${CMAKE_SYSTEM_NAME})
endif()
endif()
endif()
# query number of logical cores
cmake_host_system_information(RESULT CPU_CORES QUERY NUMBER_OF_LOGICAL_CORES)
mark_as_advanced(HOST_SYSTEM CPU_CORES)
message(
STATUS
"Found Paddle host system: ${HOST_SYSTEM}, version: ${HOST_SYSTEM_VERSION}")
message(STATUS "Found Paddle host system's CPU: ${CPU_CORES} cores")
# external dependencies log output
set(EXTERNAL_PROJECT_LOG_ARGS
LOG_DOWNLOAD
0 # Wrap download in script to log output
LOG_UPDATE
1 # Wrap update in script to log output
LOG_CONFIGURE
1 # Wrap configure in script to log output
LOG_BUILD
0 # Wrap build in script to log output
LOG_TEST
1 # Wrap test in script to log output
LOG_INSTALL
0 # Wrap install in script to log output
)
if(NOT WITH_GPU OR NOT WITH_TENSORRT)
return()
endif()
if(WIN32)
string(REPLACE "\\" "/" TENSORRT_ROOT "${TENSORRT_ROOT}")
set(TR_INFER_LIB nvinfer.lib)
set(TR_INFER_RT nvinfer.dll)
set(TR_INFER_PLUGIN_RT nvinfer_plugin.dll)
else()
set(TENSORRT_ROOT
"/usr"
CACHE PATH "TENSORRT ROOT")
set(TR_INFER_LIB libnvinfer.a)
set(TR_INFER_RT libnvinfer.so)
set(TR_INFER_PLUGIN_RT libnvinfer_plugin.so)
endif()
find_path(
TENSORRT_INCLUDE_DIR NvInfer.h
PATHS ${TENSORRT_ROOT}
${TENSORRT_ROOT}/include
${TENSORRT_ROOT}/include/${CMAKE_LIBRARY_ARCHITECTURE}
$ENV{TENSORRT_ROOT}
$ENV{TENSORRT_ROOT}/include
$ENV{TENSORRT_ROOT}/include/${CMAKE_LIBRARY_ARCHITECTURE}
NO_DEFAULT_PATH)
find_path(
TENSORRT_LIBRARY_DIR
NAMES ${TR_INFER_LIB} ${TR_INFER_RT}
PATHS ${TENSORRT_ROOT}
${TENSORRT_ROOT}/lib
${TENSORRT_ROOT}/lib/${CMAKE_LIBRARY_ARCHITECTURE}
$ENV{TENSORRT_ROOT}
$ENV{TENSORRT_ROOT}/lib
$ENV{TENSORRT_ROOT}/lib/${CMAKE_LIBRARY_ARCHITECTURE}
NO_DEFAULT_PATH
DOC "Path to TensorRT library.")
find_library(
TENSORRT_LIBRARY
NAMES ${TR_INFER_LIB} ${TR_INFER_RT}
PATHS ${TENSORRT_LIBRARY_DIR}
NO_DEFAULT_PATH
DOC "Path to TensorRT library.")
if(TENSORRT_INCLUDE_DIR AND TENSORRT_LIBRARY)
set(TENSORRT_FOUND ON)
else()
set(TENSORRT_FOUND OFF)
message(
WARNING
"TensorRT is disabled. You are compiling PaddlePaddle with option -DWITH_TENSORRT=ON, but TensorRT is not found, please configure path to TensorRT with option -DTENSORRT_ROOT or install it."
)
endif()
if(TENSORRT_FOUND)
file(READ ${TENSORRT_INCLUDE_DIR}/NvInfer.h TENSORRT_VERSION_FILE_CONTENTS)
string(REGEX MATCH "define NV_TENSORRT_MAJOR +([0-9]+)"
TENSORRT_MAJOR_VERSION "${TENSORRT_VERSION_FILE_CONTENTS}")
string(REGEX MATCH "define NV_TENSORRT_MINOR +([0-9]+)"
TENSORRT_MINOR_VERSION "${TENSORRT_VERSION_FILE_CONTENTS}")
string(REGEX MATCH "define NV_TENSORRT_PATCH +([0-9]+)"
TENSORRT_PATCH_VERSION "${TENSORRT_VERSION_FILE_CONTENTS}")
string(REGEX MATCH "define NV_TENSORRT_BUILD +([0-9]+)"
TENSORRT_BUILD_VERSION "${TENSORRT_VERSION_FILE_CONTENTS}")
if("${TENSORRT_MAJOR_VERSION}" STREQUAL "")
file(READ ${TENSORRT_INCLUDE_DIR}/NvInferVersion.h
TENSORRT_VERSION_FILE_CONTENTS)
string(REGEX MATCH "define NV_TENSORRT_MAJOR +([0-9]+)"
TENSORRT_MAJOR_VERSION "${TENSORRT_VERSION_FILE_CONTENTS}")
string(REGEX MATCH "define NV_TENSORRT_MINOR +([0-9]+)"
TENSORRT_MINOR_VERSION "${TENSORRT_VERSION_FILE_CONTENTS}")
string(REGEX MATCH "define NV_TENSORRT_PATCH +([0-9]+)"
TENSORRT_PATCH_VERSION "${TENSORRT_VERSION_FILE_CONTENTS}")
string(REGEX MATCH "define NV_TENSORRT_BUILD +([0-9]+)"
TENSORRT_BUILD_VERSION "${TENSORRT_VERSION_FILE_CONTENTS}")
endif()
if("${TENSORRT_MAJOR_VERSION}" STREQUAL "")
message(SEND_ERROR "Failed to detect TensorRT version.")
endif()
string(REGEX REPLACE "define NV_TENSORRT_MAJOR +([0-9]+)" "\\1"
TENSORRT_MAJOR_VERSION "${TENSORRT_MAJOR_VERSION}")
string(REGEX REPLACE "define NV_TENSORRT_MINOR +([0-9]+)" "\\1"
TENSORRT_MINOR_VERSION "${TENSORRT_MINOR_VERSION}")
string(REGEX REPLACE "define NV_TENSORRT_PATCH +([0-9]+)" "\\1"
TENSORRT_PATCH_VERSION "${TENSORRT_PATCH_VERSION}")
string(REGEX REPLACE "define NV_TENSORRT_BUILD +([0-9]+)" "\\1"
TENSORRT_BUILD_VERSION "${TENSORRT_BUILD_VERSION}")
message(
STATUS
"Current TensorRT header is ${TENSORRT_INCLUDE_DIR}/NvInfer.h. "
"Current TensorRT version is v${TENSORRT_MAJOR_VERSION}.${TENSORRT_MINOR_VERSION}.${TENSORRT_PATCH_VERSION}.${TENSORRT_BUILD_VERSION} "
)
include_directories(${TENSORRT_INCLUDE_DIR})
link_directories(${TENSORRT_LIBRARY})
add_definitions(-DPADDLE_WITH_TENSORRT)
endif()
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
include(ExternalProject)
# Creat a target named "third_party", which can compile external dependencies on all platform(windows/linux/mac)
set(THIRD_PARTY_PATH
"${CMAKE_BINARY_DIR}/third_party"
CACHE STRING
"A path setting third party libraries download & build directories.")
set(THIRD_PARTY_CACHE_PATH
"${CMAKE_SOURCE_DIR}"
CACHE STRING
"A path cache third party source code to avoid repeated download.")
set(THIRD_PARTY_BUILD_TYPE Release)
set(third_party_deps)
include(ProcessorCount)
ProcessorCount(NPROC)
# cache funciton to avoid repeat download code of third_party.
# This function has 4 parameters, URL / REPOSITOR / TAG / DIR:
# 1. URL: specify download url of 3rd party
# 2. REPOSITORY: specify git REPOSITORY of 3rd party
# 3. TAG: specify git tag/branch/commitID of 3rd party
# 4. DIR: overwrite the original SOURCE_DIR when cache directory
#
# The function Return 1 PARENT_SCOPE variables:
# - ${TARGET}_DOWNLOAD_CMD: Simply place "${TARGET}_DOWNLOAD_CMD" in ExternalProject_Add,
# and you no longer need to set any donwnload steps in ExternalProject_Add.
# For example:
# Cache_third_party(${TARGET}
# REPOSITORY ${TARGET_REPOSITORY}
# TAG ${TARGET_TAG}
# DIR ${TARGET_SOURCE_DIR})
function(cache_third_party TARGET)
set(options "")
set(oneValueArgs URL REPOSITORY TAG DIR)
set(multiValueArgs "")
cmake_parse_arguments(cache_third_party "${optionps}" "${oneValueArgs}"
"${multiValueArgs}" ${ARGN})
string(REPLACE "extern_" "" TARGET_NAME ${TARGET})
string(REGEX REPLACE "[0-9]+" "" TARGET_NAME ${TARGET_NAME})
string(TOUPPER ${TARGET_NAME} TARGET_NAME)
if(cache_third_party_REPOSITORY)
set(${TARGET_NAME}_DOWNLOAD_CMD GIT_REPOSITORY
${cache_third_party_REPOSITORY})
if(cache_third_party_TAG)
list(APPEND ${TARGET_NAME}_DOWNLOAD_CMD GIT_TAG ${cache_third_party_TAG})
endif()
elseif(cache_third_party_URL)
set(${TARGET_NAME}_DOWNLOAD_CMD URL ${cache_third_party_URL})
else()
message(
FATAL_ERROR "Download link (Git repo or URL) must be specified for cache!"
)
endif()
if(WITH_TP_CACHE)
if(NOT cache_third_party_DIR)
message(
FATAL_ERROR
"Please input the ${TARGET_NAME}_SOURCE_DIR for overwriting when -DWITH_TP_CACHE=ON"
)
endif()
# Generate and verify cache dir for third_party source code
set(cache_third_party_REPOSITORY ${cache_third_party_REPOSITORY}
${cache_third_party_URL})
if(cache_third_party_REPOSITORY AND cache_third_party_TAG)
string(MD5 HASH_REPO ${cache_third_party_REPOSITORY})
string(MD5 HASH_GIT ${cache_third_party_TAG})
string(SUBSTRING ${HASH_REPO} 0 8 HASH_REPO)
string(SUBSTRING ${HASH_GIT} 0 8 HASH_GIT)
string(CONCAT HASH ${HASH_REPO} ${HASH_GIT})
# overwrite the original SOURCE_DIR when cache directory
set(${cache_third_party_DIR}
${THIRD_PARTY_CACHE_PATH}/third_party/${TARGET}_${HASH})
elseif(cache_third_party_REPOSITORY)
string(MD5 HASH_REPO ${cache_third_party_REPOSITORY})
string(SUBSTRING ${HASH_REPO} 0 16 HASH)
# overwrite the original SOURCE_DIR when cache directory
set(${cache_third_party_DIR}
${THIRD_PARTY_CACHE_PATH}/third_party/${TARGET}_${HASH})
endif()
if(EXISTS ${${cache_third_party_DIR}})
# judge whether the cache dir is empty
file(GLOB files ${${cache_third_party_DIR}}/*)
list(LENGTH files files_len)
if(files_len GREATER 0)
list(APPEND ${TARGET_NAME}_DOWNLOAD_CMD DOWNLOAD_COMMAND "")
endif()
endif()
set(${cache_third_party_DIR}
${${cache_third_party_DIR}}
PARENT_SCOPE)
endif()
# Pass ${TARGET_NAME}_DOWNLOAD_CMD to parent scope, the double quotation marks can't be removed
set(${TARGET_NAME}_DOWNLOAD_CMD
"${${TARGET_NAME}_DOWNLOAD_CMD}"
PARENT_SCOPE)
endfunction()
macro(UNSET_VAR VAR_NAME)
unset(${VAR_NAME} CACHE)
unset(${VAR_NAME})
endmacro()
# Funciton to Download the dependencies during compilation
# This function has 2 parameters, URL / DIRNAME:
# 1. URL: The download url of 3rd dependencies
# 2. NAME: The name of file, that determin the dirname
#
function(file_download_and_uncompress URL NAME)
set(options "")
set(oneValueArgs MD5)
set(multiValueArgs "")
cmake_parse_arguments(URL "${options}" "${oneValueArgs}" "${multiValueArgs}"
${ARGN})
message(STATUS "Download dependence[${NAME}] from ${URL}, MD5: ${URL_MD5}")
set(${NAME}_INCLUDE_DIR
${THIRD_PARTY_PATH}/${NAME}/data
PARENT_SCOPE)
ExternalProject_Add(
download_${NAME}
${EXTERNAL_PROJECT_LOG_ARGS}
PREFIX ${THIRD_PARTY_PATH}/${NAME}
URL ${URL}
URL_MD5 ${URL_MD5}
TIMEOUT 120
DOWNLOAD_DIR ${THIRD_PARTY_PATH}/${NAME}/data/
SOURCE_DIR ${THIRD_PARTY_PATH}/${NAME}/data/
DOWNLOAD_NO_PROGRESS 1
CONFIGURE_COMMAND ""
BUILD_COMMAND ""
UPDATE_COMMAND ""
INSTALL_COMMAND "")
set(third_party_deps
${third_party_deps} download_${NAME}
PARENT_SCOPE)
endfunction()
# Correction of flags on different Platform(WIN/MAC) and Print Warning Message
if(APPLE)
if(WITH_MKL)
message(
WARNING "Mac is not supported with MKL in Paddle yet. Force WITH_MKL=OFF."
)
set(WITH_MKL
OFF
CACHE STRING "Disable MKL for building on mac" FORCE)
endif()
endif()
if(WIN32 OR APPLE)
message(STATUS "Disable XBYAK in Windows and MacOS")
set(WITH_XBYAK
OFF
CACHE STRING "Disable XBYAK in Windows and MacOS" FORCE)
if(WITH_LIBXSMM)
message(WARNING "Windows, Mac are not supported with libxsmm in Paddle yet."
"Force WITH_LIBXSMM=OFF")
set(WITH_LIBXSMM
OFF
CACHE STRING "Disable LIBXSMM in Windows and MacOS" FORCE)
endif()
if(WITH_BOX_PS)
message(WARNING "Windows or Mac is not supported with BOX_PS in Paddle yet."
"Force WITH_BOX_PS=OFF")
set(WITH_BOX_PS
OFF
CACHE STRING "Disable BOX_PS package in Windows and MacOS" FORCE)
endif()
if(WITH_PSLIB)
message(WARNING "Windows or Mac is not supported with PSLIB in Paddle yet."
"Force WITH_PSLIB=OFF")
set(WITH_PSLIB
OFF
CACHE STRING "Disable PSLIB package in Windows and MacOS" FORCE)
endif()
if(WITH_ARM_BRPC)
message(
WARNING "Windows or Mac is not supported with ARM_BRPC in Paddle yet."
"Force WITH_ARM_BRPC=OFF")
set(WITH_ARM_BRPC
OFF
CACHE STRING "Disable ARM_BRPC package in Windows and MacOS" FORCE)
endif()
if(WITH_LIBMCT)
message(WARNING "Windows or Mac is not supported with LIBMCT in Paddle yet."
"Force WITH_LIBMCT=OFF")
set(WITH_LIBMCT
OFF
CACHE STRING "Disable LIBMCT package in Windows and MacOS" FORCE)
endif()
if(WITH_PSLIB_BRPC)
message(
WARNING "Windows or Mac is not supported with PSLIB_BRPC in Paddle yet."
"Force WITH_PSLIB_BRPC=OFF")
set(WITH_PSLIB_BRPC
OFF
CACHE STRING "Disable PSLIB_BRPC package in Windows and MacOS" FORCE)
endif()
endif()
set(WITH_MKLML ${WITH_MKL})
if(NOT DEFINED WITH_MKLDNN)
if(WITH_MKL AND AVX2_FOUND)
set(WITH_MKLDNN ON)
else()
message(STATUS "Do not have AVX2 intrinsics and disabled MKL-DNN.")
set(WITH_MKLDNN OFF)
endif()
endif()
if(WIN32
OR APPLE
OR NOT WITH_GPU
OR (ON_INFER AND NOT WITH_PYTHON))
set(WITH_DGC OFF)
endif()
if(${CMAKE_VERSION} VERSION_GREATER "3.5.2")
set(SHALLOW_CLONE "GIT_SHALLOW TRUE"
)# adds --depth=1 arg to git clone of External_Projects
endif()
########################### include third_party according to flags ###############################
include(external/zlib) # download, build, install zlib
include(external/gflags) # download, build, install gflags
include(external/glog) # download, build, install glog
include(external/eigen) # download eigen3
include(external/threadpool) # download threadpool
include(external/dlpack) # download dlpack
include(external/xxhash) # download, build, install xxhash
include(external/warpctc) # download, build, install warpctc
include(external/utf8proc) # download, build, install utf8proc
list(APPEND third_party_deps extern_eigen3 extern_gflags extern_glog
extern_xxhash)
list(
APPEND
third_party_deps
extern_zlib
extern_dlpack
extern_warpctc
extern_threadpool
extern_utf8proc)
include(external/lapack) # download, build, install lapack
list(APPEND third_party_deps extern_eigen3 extern_gflags extern_glog
extern_xxhash)
list(
APPEND
third_party_deps
extern_zlib
extern_dlpack
extern_warpctc
extern_threadpool
extern_lapack)
include(cblas) # find first, then download, build, install openblas
message(STATUS "CBLAS_PROVIDER: ${CBLAS_PROVIDER}")
if(${CBLAS_PROVIDER} STREQUAL MKLML)
list(APPEND third_party_deps extern_mklml)
elseif(${CBLAS_PROVIDER} STREQUAL EXTERN_OPENBLAS)
list(APPEND third_party_deps extern_openblas)
endif()
if(WITH_MKLDNN)
include(external/mkldnn) # download, build, install mkldnn
list(APPEND third_party_deps extern_mkldnn)
endif()
include(external/protobuf) # find first, then download, build, install protobuf
if(TARGET extern_protobuf)
list(APPEND third_party_deps extern_protobuf)
endif()
if(WITH_PYTHON)
include(external/python) # find python and python_module
include(external/pybind11) # download pybind11
list(APPEND third_party_deps extern_pybind)
endif()
if(WITH_TESTING OR WITH_DISTRIBUTE)
include(external/gtest) # download, build, install gtest
list(APPEND third_party_deps extern_gtest)
endif()
if(WITH_ONNXRUNTIME)
include(external/onnxruntime
)# download, build, install onnxruntime、paddle2onnx
include(external/paddle2onnx)
list(APPEND third_party_deps extern_onnxruntime extern_paddle2onnx)
endif()
if(WITH_GPU)
if(${CMAKE_CUDA_COMPILER_VERSION} LESS 11.0 OR ${CMAKE_CUDA_COMPILER_VERSION}
GREATER_EQUAL 11.6)
include(external/cub) # download cub
list(APPEND third_party_deps extern_cub)
endif()
set(URL
"https://paddlepaddledeps.bj.bcebos.com/externalErrorMsg_20210928.tar.gz"
CACHE STRING "" FORCE)
file_download_and_uncompress(
${URL} "externalError" MD5 a712a49384e77ca216ad866712f7cafa
)# download file externalErrorMsg.tar.gz
if(WITH_TESTING)
# copy externalErrorMsg.pb, just for unittest can get error message correctly.
set(SRC_DIR ${THIRD_PARTY_PATH}/externalError/data)
if(WIN32 AND (NOT "${CMAKE_GENERATOR}" STREQUAL "Ninja"))
set(DST_DIR1
${CMAKE_BINARY_DIR}/paddle/fluid/third_party/externalError/data)
else()
set(DST_DIR1 ${CMAKE_BINARY_DIR}/paddle/third_party/externalError/data)
endif()
set(DST_DIR2
${CMAKE_BINARY_DIR}/python/paddle/include/third_party/externalError/data
)
add_custom_command(
TARGET download_externalError
POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_directory ${SRC_DIR} ${DST_DIR1}
COMMAND ${CMAKE_COMMAND} -E copy_directory ${SRC_DIR} ${DST_DIR2}
COMMENT "copy_directory from ${SRC_DIR} to ${DST_DIR}")
endif()
endif()
if(WITH_XPU)
include(external/xpu) # download, build, install xpu
list(APPEND third_party_deps extern_xpu)
endif()
if(WITH_MLU)
include(external/concurrentqueue) # download, build, install concurrentqueue
list(APPEND third_party_deps extern_concurrentqueue)
endif()
if(WITH_PSLIB)
include(external/pslib) # download, build, install pslib
list(APPEND third_party_deps extern_pslib)
if(WITH_LIBMCT)
include(external/libmct) # download, build, install libmct
list(APPEND third_party_deps extern_libxsmm)
endif()
if(WITH_PSLIB_BRPC)
include(external/pslib_brpc) # download, build, install pslib_brpc
list(APPEND third_party_deps extern_pslib_brpc)
else()
include(external/snappy)
list(APPEND third_party_deps extern_snappy)
include(external/leveldb)
list(APPEND third_party_deps extern_leveldb)
if(NOT WITH_HETERPS)
include(external/brpc)
list(APPEND third_party_deps extern_brpc)
endif()
endif()
endif()
if(NOT WIN32 AND NOT APPLE)
include(external/gloo)
list(APPEND third_party_deps extern_gloo)
endif()
if(WITH_BOX_PS)
include(external/box_ps)
list(APPEND third_party_deps extern_box_ps)
endif()
if(WITH_ASCEND OR WITH_ASCEND_CL)
include(external/ascend)
if(WITH_ASCEND OR WITH_ASCEND_CL)
list(APPEND third_party_deps extern_ascend)
endif()
if(WITH_ASCEND_CL)
list(APPEND third_party_deps extern_ascend_cl)
endif()
endif()
if(WITH_PSCORE)
include(external/snappy)
list(APPEND third_party_deps extern_snappy)
include(external/leveldb)
list(APPEND third_party_deps extern_leveldb)
if(WITH_ARM_BRPC)
include(external/arm_brpc)
list(APPEND third_party_deps extern_arm_brpc)
else()
include(external/brpc)
list(APPEND third_party_deps extern_brpc)
endif()
include(external/libmct) # download, build, install libmct
list(APPEND third_party_deps extern_libmct)
include(external/rocksdb) # download, build, install rocksdb
list(APPEND third_party_deps extern_rocksdb)
endif()
if(WITH_XBYAK)
include(external/xbyak) # download, build, install xbyak
list(APPEND third_party_deps extern_xbyak)
endif()
if(WITH_LIBXSMM)
include(external/libxsmm) # download, build, install libxsmm
list(APPEND third_party_deps extern_libxsmm)
endif()
if(WITH_DGC)
message(STATUS "add dgc lib.")
include(external/dgc) # download, build, install dgc
add_definitions(-DPADDLE_WITH_DGC)
list(APPEND third_party_deps extern_dgc)
endif()
if(WITH_LITE)
message(STATUS "Compile Paddle with Lite Engine.")
include(external/lite)
endif()
if(WITH_CINN)
message(STATUS "Compile Paddle with CINN.")
include(external/cinn)
add_definitions(-DPADDLE_WITH_CINN)
if(WITH_GPU)
add_definitions(-DCINN_WITH_CUDA)
add_definitions(-DCINN_WITH_CUDNN)
endif()
if(WITH_MKL)
add_definitions(-DCINN_WITH_MKL_CBLAS)
add_definitions(-DCINN_WITH_MKLDNN)
endif()
endif()
if(WITH_CRYPTO)
include(external/cryptopp) # download, build, install cryptopp
list(APPEND third_party_deps extern_cryptopp)
add_definitions(-DPADDLE_WITH_CRYPTO)
endif()
if(WITH_POCKETFFT)
include(external/pocketfft)
list(APPEND third_party_deps extern_pocketfft)
add_definitions(-DPADDLE_WITH_POCKETFFT)
endif()
if(WIN32)
include(external/dirent)
list(APPEND third_party_deps extern_dirent)
endif()
if(WITH_INFRT)
include(external/llvm)
list(APPEND third_party_deps ${llvm_libs})
endif()
if(WITH_IPU)
include(external/poplar)
list(APPEND third_party_deps extern_poplar)
endif()
if(WITH_CUSPARSELT)
include(external/cusparselt) # download, build, install cusparselt
list(APPEND third_party_deps extern_cusparselt)
endif()
add_custom_target(third_party ALL DEPENDS ${third_party_deps})
function(add_thrust_patches_if_necessary)
set(thrust_detect_file ${PROJECT_BINARY_DIR}/detect_thrust.cu)
file(
WRITE ${thrust_detect_file}
""
"#include \"thrust/version.h\"\n"
"#include \"thrust/shuffle.h\"\n"
"#include \"stdio.h\"\n"
"int main() {\n"
" int version = THRUST_VERSION;\n"
" printf(\"%d\", version);\n"
" return 0;\n"
"}\n")
execute_process(
COMMAND "${CUDA_NVCC_EXECUTABLE}" "--run" "${thrust_detect_file}"
WORKING_DIRECTORY "${PROJECT_BINARY_DIR}/CMakeFiles/"
RESULT_VARIABLE nvcc_res
ERROR_QUIET)
if(NOT nvcc_res EQUAL 0)
set(thrust_patches "${PADDLE_SOURCE_DIR}/patches/thrust")
message(STATUS "Add thrust patches: ${thrust_patches}")
include_directories(${thrust_patches})
endif()
endfunction()
add_thrust_patches_if_necessary()
# Add the following code before all include to avoid compilation failure.
set(UNITY_CC_BEFORE_CODE
[[
#ifndef NOMINMAX
#define NOMINMAX
#endif
#ifndef _USE_MATH_DEFINES
#define _USE_MATH_DEFINES
#endif]])
set(UNITY_CU_BEFORE_CODE
[[
#ifndef __CUDACC_VER_MAJOR__
#define __CUDACC_VER_MAJOR__ CUDA_COMPILER_MAJOR_VERSION
#endif
#ifndef __CUDACC_VER_MINOR__
#define __CUDACC_VER_MINOR__ CUDA_COMPILER_MINOR_VERSION
#endif]])
if(WITH_GPU)
string(REPLACE "." ";" CUDA_COMPILER_VERSION ${CMAKE_CUDA_COMPILER_VERSION})
list(GET CUDA_COMPILER_VERSION 0 CUDA_COMPILER_MAJOR_VERSION)
list(GET CUDA_COMPILER_VERSION 1 CUDA_COMPILER_MINOR_VERSION)
string(REPLACE "CUDA_COMPILER_MAJOR_VERSION" ${CUDA_COMPILER_MAJOR_VERSION}
UNITY_CU_BEFORE_CODE ${UNITY_CU_BEFORE_CODE})
string(REPLACE "CUDA_COMPILER_MINOR_VERSION" ${CUDA_COMPILER_MINOR_VERSION}
UNITY_CU_BEFORE_CODE ${UNITY_CU_BEFORE_CODE})
endif()
# Group a list of source files that can be included together.
# This combination is just a guiding rule, and the source file of group
# do not have to exist.
# Here you need to specify the source type which belongs to cc or cu.
function(register_unity_group TYPE)
# Get UNITY_TARGET from CMAKE_CURRENT_SOURCE_DIR.
string(REPLACE "${PADDLE_SOURCE_DIR}/paddle/fluid/" "" UNITY_TARGET
${CMAKE_CURRENT_SOURCE_DIR})
string(REPLACE "/" "_" UNITY_TARGET ${UNITY_TARGET})
set(UNITY_TARGET "paddle_${UNITY_TARGET}_unity")
# Variable unity_group_index is used to record the number of UNITY_TARGET groups.
get_property(unity_group_index GLOBAL
PROPERTY ${UNITY_TARGET}_${TYPE}_group_index)
if("${unity_group_index}" STREQUAL "")
set(unity_group_index 0)
endif()
# Variable unity_group_sources is used to record the sources of one group.
set(unity_group_sources
${UNITY_TARGET}_${TYPE}_group_${unity_group_index}_sources)
set_property(GLOBAL PROPERTY ${unity_group_sources} "")
foreach(src ${ARGN})
# UB use absolute path of source.
if(NOT IS_ABSOLUTE ${src})
set(src ${CMAKE_CURRENT_SOURCE_DIR}/${src})
endif()
set_property(GLOBAL APPEND PROPERTY ${unity_group_sources} ${src})
endforeach()
# If unity_file does not exists, nv_library or cc_library will use
# dummy_file. Touch unity_file to avoid to use dummy file.
set(unity_file
${CMAKE_CURRENT_BINARY_DIR}/${UNITY_TARGET}_${unity_group_index}_${TYPE}.${TYPE}
)
if(NOT EXISTS ${unity_file})
file(TOUCH ${unity_file})
endif()
math(EXPR unity_group_index "${unity_group_index} + 1")
set_property(GLOBAL PROPERTY ${UNITY_TARGET}_${TYPE}_group_index
${unity_group_index})
endfunction()
# Combine the original source files used by `TARGET`, then use
# `unity_target_${TYPE}_sources` to get the combined source files.
# If the source file does not hit any registed groups, use itself.
# This function put the actual combination relationship in variables instead of
# writing the unity source file. The reason is that writing unity source file
# will change the timestampe and affect the effect of retaining the build
# directory on Windows.
# Here you need to specify the source type which belongs to cc or cu.
function(compose_unity_target_sources TARGET TYPE)
# Variable unity_target_sources represents the source file used in TARGET
set(unity_target_sources "")
get_property(unity_group_index_max GLOBAL
PROPERTY ${TARGET}_${TYPE}_group_index)
foreach(src ${ARGN})
set(unity_file "")
# Note(zhouwei25): UB use the path releative to CMAKE_SOURCE_DIR.
# If use absolute path, sccache/ccache hit rate will be reduced.
if(IS_ABSOLUTE ${src})
set(src_absolute_path ${src})
file(RELATIVE_PATH src_relative_path ${CMAKE_SOURCE_DIR} ${src})
else()
set(src_absolute_path ${CMAKE_CURRENT_SOURCE_DIR}/${src})
file(RELATIVE_PATH src_relative_path ${CMAKE_SOURCE_DIR}
${src_absolute_path})
endif()
# If `unity_group_index_max` is empty, there is no combination
# relationship.
# TODO(Avin0323): Whether use target property `UNITY_BUILD` of CMAKE to
# combine source files.
if(NOT "${unity_group_index_max}" STREQUAL "")
# Search in each registed group.
foreach(unity_group_index RANGE ${unity_group_index_max})
if(${unity_group_index} GREATER_EQUAL ${unity_group_index_max})
break()
endif()
get_property(
unity_group_sources GLOBAL
PROPERTY ${TARGET}_${TYPE}_group_${unity_group_index}_sources)
if(${src_absolute_path} IN_LIST unity_group_sources)
set(unity_file
${CMAKE_CURRENT_BINARY_DIR}/${TARGET}_${unity_group_index}_${TYPE}.${TYPE}
)
set(unity_file_sources
${TARGET}_${TYPE}_file_${unity_group_index}_sources)
get_property(
set_unity_file_sources GLOBAL
PROPERTY ${unity_file_sources}
SET)
if(NOT ${set_unity_file_sources})
# Add macro before include source files.
set_property(GLOBAL PROPERTY ${unity_file_sources}
"// Generate by Unity Build")
set_property(GLOBAL APPEND PROPERTY ${unity_file_sources}
${UNITY_CC_BEFORE_CODE})
if(WITH_GPU AND "${TYPE}" STREQUAL "cu")
set_property(GLOBAL APPEND PROPERTY ${unity_file_sources}
${UNITY_CU_BEFORE_CODE})
endif()
endif()
set_property(
GLOBAL APPEND PROPERTY ${unity_file_sources}
"#include \"${src_relative_path}\"")
set(unity_target_sources ${unity_target_sources} ${unity_file})
break()
endif()
endforeach()
endif()
# Use original source file.
if("${unity_file}" STREQUAL "")
set(unity_target_sources ${unity_target_sources} ${src})
endif()
endforeach()
set(unity_target_${TYPE}_sources
${unity_target_sources}
PARENT_SCOPE)
endfunction()
# Write the unity files used by `UNITY_TARGET`.
# Write dependent on whether the contents of the unity file have changed, which
# protects incremental compilation speed.
function(finish_unity_target TYPE)
# Get UNITY_TARGET from CMAKE_CURRENT_SOURCE_DIR.
string(REPLACE "${PADDLE_SOURCE_DIR}/paddle/fluid/" "" UNITY_TARGET
${CMAKE_CURRENT_SOURCE_DIR})
string(REPLACE "/" "_" UNITY_TARGET ${UNITY_TARGET})
set(UNITY_TARGET "paddle_${UNITY_TARGET}_unity")
get_property(unity_group_index_max GLOBAL
PROPERTY ${UNITY_TARGET}_${TYPE}_group_index)
if(NOT "${unity_group_index_max}" STREQUAL "")
foreach(unity_group_index RANGE ${unity_group_index_max})
if(${unity_group_index} GREATER_EQUAL ${unity_group_index_max})
break()
endif()
get_property(
unity_file_sources GLOBAL
PROPERTY ${UNITY_TARGET}_${TYPE}_file_${unity_group_index}_sources)
set(unity_file_read_content "")
string(JOIN "\n" unity_file_write_content ${unity_file_sources})
set(unity_file
${CMAKE_CURRENT_BINARY_DIR}/${UNITY_TARGET}_${unity_group_index}_${TYPE}.${TYPE}
)
file(READ ${unity_file} unity_file_read_content)
if(NOT "${unity_file_read_content}" STREQUAL
"${unity_file_write_content}")
file(WRITE ${unity_file} ${unity_file_write_content})
endif()
endforeach()
endif()
endfunction()
# Some common routine for paddle compile.
# target_circle_link_libraries
# Link libraries to target which has circle dependencies.
#
# First Argument: target name want to be linked with libraries
# Rest Arguments: libraries which link together.
function(target_circle_link_libraries TARGET_NAME)
if(APPLE)
set(LIBS)
set(inArchive OFF)
set(libsInArgn)
foreach(arg ${ARGN})
if(${arg} STREQUAL "ARCHIVE_START")
set(inArchive ON)
elseif(${arg} STREQUAL "ARCHIVE_END")
set(inArchive OFF)
else()
if(inArchive)
list(APPEND LIBS "-Wl,-force_load")
endif()
list(APPEND LIBS ${arg})
list(APPEND libsInArgn ${arg})
endif()
endforeach()
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" OR "${CMAKE_CXX_COMPILER_ID}"
STREQUAL "AppleClang")
if(NOT IOS_ENABLE_BITCODE)
list(APPEND LIBS "-undefined dynamic_lookup")
endif()
endif()
list(REVERSE libsInArgn)
target_link_libraries(${TARGET_NAME} ${LIBS} ${libsInArgn})
else() # LINUX
set(LIBS)
foreach(arg ${ARGN})
if(${arg} STREQUAL "ARCHIVE_START")
list(APPEND LIBS "-Wl,--whole-archive")
elseif(${arg} STREQUAL "ARCHIVE_END")
list(APPEND LIBS "-Wl,--no-whole-archive")
else()
list(APPEND LIBS ${arg})
endif()
endforeach()
target_link_libraries(${TARGET_NAME} "-Wl,--start-group" ${LIBS}
"-Wl,--end-group")
endif()
endfunction()
# Get the latest git tag.
set(PADDLE_VERSION $ENV{PADDLE_VERSION})
set(tmp_version "HEAD")
set(TAG_VERSION_REGEX "[0-9]+\\.[0-9]+\\.[0-9]+(\\.(a|b|rc)\\.[0-9]+)?")
set(COMMIT_VERSION_REGEX "[0-9a-f]+[0-9a-f]+[0-9a-f]+[0-9a-f]+[0-9a-f]+")
while("${PADDLE_VERSION}" STREQUAL "")
# Check current branch name
execute_process(
COMMAND ${GIT_EXECUTABLE} rev-parse --abbrev-ref ${tmp_version}
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}
OUTPUT_VARIABLE GIT_BRANCH_NAME
RESULT_VARIABLE GIT_BRANCH_RESULT
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
if(NOT ${GIT_BRANCH_RESULT})
execute_process(
COMMAND ${GIT_EXECUTABLE} describe --tags --abbrev=0 --always
${tmp_version}
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}
OUTPUT_VARIABLE GIT_TAG_NAME
RESULT_VARIABLE GIT_RESULT
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
if(NOT ${GIT_RESULT})
# Check if current branch is release branch
if(${GIT_BRANCH_NAME} MATCHES "release/${TAG_VERSION_REGEX}")
# Check the tag is a correct version
if(${GIT_TAG_NAME} MATCHES "${COMMIT_VERSION_REGEX}")
# if no tag was found, set PADDLE_VERSION to 0.0.0 to represent latest
set(PADDLE_VERSION "0.0.0")
elseif(${GIT_TAG_NAME} MATCHES "v${TAG_VERSION_REGEX}")
string(REPLACE "v" "" PADDLE_VERSION ${GIT_TAG_NAME})
else() # otherwise, get the previous git tag name.
set(tmp_version "${GIT_TAG_NAME}~1")
endif()
else()
execute_process(
COMMAND ${GIT_EXECUTABLE} describe --exact-match --tags ${tmp_version}
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}
OUTPUT_VARIABLE GIT_EXACT_TAG_NAME
RESULT_VARIABLE GIT_EXACT_TAG_RESULT
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
if(NOT ${GIT_EXACT_TAG_NAME})
# Check if current branch is tag branch
if(${GIT_EXACT_TAG_NAME} MATCHES "v${TAG_VERSION_REGEX}")
string(REPLACE "v" "" PADDLE_VERSION ${GIT_EXACT_TAG_NAME})
else()
set(PADDLE_VERSION "0.0.0")
endif()
else()
# otherwise, we always set PADDLE_VERSION to 0.0.0 to represent latest
set(PADDLE_VERSION "0.0.0")
endif()
endif()
else()
set(PADDLE_VERSION "0.0.0")
message(WARNING "Cannot add paddle version from git tag")
endif()
else()
set(PADDLE_VERSION "0.0.0")
message(WARNING "Cannot add paddle version for wrong git branch result")
endif()
endwhile()
string(REPLACE "-" "." PADDLE_VER_LIST ${PADDLE_VERSION})
string(REPLACE "." ";" PADDLE_VER_LIST ${PADDLE_VER_LIST})
list(GET PADDLE_VER_LIST 0 PADDLE_MAJOR_VER)
list(GET PADDLE_VER_LIST 1 PADDLE_MINOR_VER)
list(GET PADDLE_VER_LIST 2 PADDLE_PATCH_VER)
math(EXPR PADDLE_VERSION_INTEGER "${PADDLE_MAJOR_VER} * 1000000
+ ${PADDLE_MINOR_VER} * 1000 + ${PADDLE_PATCH_VER}")
add_definitions(-DPADDLE_VERSION=${PADDLE_VERSION})
add_definitions(-DPADDLE_VERSION_INTEGER=${PADDLE_VERSION_INTEGER})
message(STATUS "Paddle version is ${PADDLE_VERSION}")
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if(NOT WITH_XPU_KP)
return()
endif()
set(LINK_FLAGS "-Wl,--allow-multiple-definition")
set(CMAKE_EXE_LINKER_FLAGS "${LINK_FLAGS}")
set(CMAKE_SHARED_LINKER_FLAGS "${LINK_FLAGS}")
if(NOT XPU_TOOLCHAIN)
set(XPU_TOOLCHAIN /workspace/output/XTDK-ubuntu_x86_64)
get_filename_component(XPU_TOOLCHAIN ${XPU_TOOLCHAIN} REALPATH)
endif()
if(NOT IS_DIRECTORY ${XPU_TOOLCHAIN})
message(FATAL_ERROR "Directory ${XPU_TOOLCHAIN} not found!")
endif()
message(STATUS "Build with XPU_TOOLCHAIN=" ${XPU_TOOLCHAIN})
set(XPU_CLANG ${XPU_TOOLCHAIN}/bin/clang++)
message(STATUS "Build with XPU_CLANG=" ${XPU_CLANG})
# The host sysroot of XPU compiler is gcc-8.2
if(NOT HOST_SYSROOT)
set(HOST_SYSROOT /opt/compiler/gcc-8.2)
endif()
if(NOT IS_DIRECTORY ${HOST_SYSROOT})
message(FATAL_ERROR "Directory ${HOST_SYSROOT} not found!")
endif()
if(NOT API_ARCH)
set(API_ARCH x86_64-baidu-linux-gnu)
endif()
if(API_ARCH MATCHES "x86_64")
if(EXISTS ${HOST_SYSROOT}/bin/g++)
set(HOST_CXX ${HOST_SYSROOT}/bin/g++)
set(HOST_AR ${HOST_SYSROOT}/bin/ar)
else()
set(HOST_CXX /usr/bin/g++)
set(HOST_AR /usr/bin/ar)
endif()
else()
set(HOST_CXX ${CMAKE_CXX_COMPILER})
set(HOST_AR ${CMAKE_AR})
endif()
set(TOOLCHAIN_ARGS)
if(OPT_LEVEL)
set(OPT_LEVEL ${OPT_LEVEL})
else()
set(OPT_LEVEL "-O3")
endif()
message(STATUS "Build with API_ARCH=" ${API_ARCH})
message(STATUS "Build with TOOLCHAIN_ARGS=" ${TOOLCHAIN_ARGS})
message(STATUS "Build with HOST_SYSROOT=" ${HOST_SYSROOT})
message(STATUS "Build with HOST_CXX=" ${HOST_CXX})
message(STATUS "Build with HOST_AR=" ${HOST_AR})
macro(compile_kernel COMPILE_ARGS)
set(options "")
set(oneValueArgs "")
set(multiValueArgs
KERNEL
DIRPATH
XNAME
DEVICE
HOST
XPU
DEPENDS)
cmake_parse_arguments(xpu_add_library "${options}" "${oneValueArgs}"
"${multiValueArgs}" ${ARGN})
set(kernel_path ${xpu_add_library_DIRPATH})
set(kernel_name ${xpu_add_library_XNAME})
set(device_o_extra_flags ${xpu_add_library_DEVICE})
set(host_o_extra_flags ${xpu_add_library_HOST})
set(xpu_1_or_2 ${xpu_add_library_XPU})
set(cc_depends ${xpu_add_library_DEPENDS})
set(kernel_target ${kernel_name}_kernel)
add_custom_target(
${kernel_target}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
DEPENDS kernel_build/${kernel_name}.host.o kernel_build/${kernel_name}.bin.o
COMMENT ${kernel_target}
VERBATIM)
if(cc_depends)
add_dependencies(${kernel_target} ${xpu_add_library_DEPENDS})
endif()
set(arg_device_o_extra_flags ${device_o_extra_flags})
separate_arguments(arg_device_o_extra_flags)
set(arg_host_o_extra_flags ${host_o_extra_flags})
separate_arguments(arg_host_o_extra_flags)
set(XTDK_DIR ${XPU_TOOLCHAIN})
set(CXX_DIR ${HOST_SYSROOT})
set(XPU_CXX_FLAGS
-fforce-enable-int128
-Wno-error=pessimizing-move
-Wno-error=constant-conversion
-Wno-error=c++11-narrowing
-Wno-error=shift-count-overflow
-Wno-error=unused-local-typedef
-Wno-error=deprecated-declarations
-Wno-deprecated-declarations
-std=c++14
-m64
-fPIC
-fno-omit-frame-pointer
-Wall
-Wno-inconsistent-missing-override
-Wextra
-Wnon-virtual-dtor
-Wdelete-non-virtual-dtor
-Wno-unused-parameter
-Wno-unused-function
-Wno-error=unused-local-typedefs
-Wno-error=ignored-attributes
-Wno-error=int-in-bool-context
-Wno-error=parentheses
-Wno-error=address
-Wno-ignored-qualifiers
-Wno-ignored-attributes
-Wno-parentheses
-DNDEBUG)
#include path
get_property(
dirs
DIRECTORY ${CMAKE_SOURCE_DIR}
PROPERTY INCLUDE_DIRECTORIES)
set(XPU_CXX_INCLUDES "")
foreach(dir IN LISTS dirs)
list(APPEND XPU_CXX_INCLUDES "-I${dir}")
endforeach()
string(REPLACE ";" " " XPU_CXX_INCLUDES "${XPU_CXX_INCLUDES}")
separate_arguments(XPU_CXX_INCLUDES UNIX_COMMAND "${XPU_CXX_INCLUDES}")
#related flags
get_directory_property(DirDefs DIRECTORY ${CMAKE_SOURCE_DIR}
COMPILE_DEFINITIONS)
set(XPU_CXX_DEFINES "")
foreach(def IN LISTS DirDefs)
list(APPEND XPU_CXX_DEFINES "-D${def}")
endforeach()
string(REPLACE ";" " " XPU_CXX_DEFINES "${XPU_CXX_DEFINES}")
separate_arguments(XPU_CXX_DEFINES UNIX_COMMAND "${XPU_CXX_DEFINES}")
set(ABI_VERSION "")
if(WITH_HETERPS AND WITH_PSLIB)
set(ABI_VERSION "-D_GLIBCXX_USE_CXX11_ABI=0")
else()
set(ABI_VERSION "-D_GLIBCXX_USE_CXX11_ABI=1")
endif()
add_custom_target(
${kernel_name}.xpu ALL
COMMAND ${CMAKE_COMMAND} -E copy ${kernel_path}/${kernel_name}.kps
kernel_build/${kernel_name}.xpu)
add_custom_command(
OUTPUT kernel_build/${kernel_name}.bin.o
COMMAND ${CMAKE_COMMAND} -E make_directory kernel_build
COMMAND
${XPU_CLANG} --sysroot=${CXX_DIR} -std=c++11 ${ABI_VERSION} ${OPT_LEVEL}
-fno-builtin -mcpu=xpu2 -fPIC ${XPU_CXX_DEFINES} ${XPU_CXX_FLAGS}
${XPU_CXX_INCLUDES} -I. -o kernel_build/${kernel_name}.bin.o.sec
kernel_build/${kernel_name}.xpu --xpu-device-only -c -v
COMMAND ${XTDK_DIR}/bin/xpu2-elfconv kernel_build/${kernel_name}.bin.o.sec
kernel_build/${kernel_name}.bin.o ${XPU_CLANG} --sysroot=${CXX_DIR}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
DEPENDS ${xpu_add_library_DEPENDS}
COMMENT kernel_build/${kernel_name}.bin.o
VERBATIM)
list(APPEND xpu_kernel_depends kernel_build/${kernel_name}.bin.o)
add_custom_command(
OUTPUT kernel_build/${kernel_name}.host.o
COMMAND ${CMAKE_COMMAND} -E make_directory kernel_build
COMMAND
${XPU_CLANG} --sysroot=${CXX_DIR} -std=c++11 ${ABI_VERSION} ${OPT_LEVEL}
-fno-builtin -mcpu=xpu2 -fPIC ${XPU_CXX_DEFINES} ${XPU_CXX_FLAGS}
${XPU_CXX_INCLUDES} -I. -o kernel_build/${kernel_name}.host.o
kernel_build/${kernel_name}.xpu --xpu-host-only -c -v
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
DEPENDS ${xpu_add_library_DEPENDS}
COMMENT kernel_build/${kernel_name}.host.o
VERBATIM)
list(APPEND xpu_kernel_depends kernel_build/${kernel_name}.host.o)
endmacro()
###############################################################################
# XPU_ADD_LIBRARY
###############################################################################
macro(xpu_add_library TARGET_NAME)
# Separate the sources from the options
set(options "")
set(oneValueArgs "")
set(multiValueArgs STATIC DEPENDS)
cmake_parse_arguments(xpu_add_library "${options}" "${oneValueArgs}"
"${multiValueArgs}" ${ARGN})
set(xpu_srcs ${xpu_add_library_STATIC})
set(xpu_target ${TARGET_NAME})
set(cc_srcs_depends ${xpu_add_library_DEPENDS})
file(GLOB_RECURSE xpu_srcs_lists ${xpu_srcs})
list(LENGTH xpu_srcs_lists xpu_srcs_lists_num)
set(XPU1_DEVICE_O_EXTRA_FLAGS " ")
set(XPU1_HOST_O_EXTRA_FLAGS " ")
# Distinguish .xpu file from other files
foreach(cur_xpu_src IN LISTS xpu_srcs_lists)
get_filename_component(language_type_name ${cur_xpu_src} EXT)
if(${language_type_name} STREQUAL ".kps")
list(APPEND xpu_kernel_lists ${cur_xpu_src})
else()
list(APPEND cc_kernel_lists ${cur_xpu_src})
endif()
endforeach()
# Ensure that there is only one xpu kernel
list(LENGTH xpu_kernel_lists xpu_kernel_lists_num)
list(LENGTH cc_srcs_depends cc_srcs_depends_num)
if(${xpu_kernel_lists_num})
foreach(xpu_kernel IN LISTS xpu_kernel_lists)
get_filename_component(kernel_name ${xpu_kernel} NAME_WE)
get_filename_component(kernel_dir ${xpu_kernel} DIRECTORY)
set(kernel_rules ${kernel_dir}/${kernel_name}.rules)
set(kernel_name ${kernel_name})
compile_kernel(
KERNEL
${xpu_kernel}
DIRPATH
${kernel_dir}
XNAME
${kernel_name}
DEVICE
${XPU1_DEVICE_O_EXTRA_FLAGS}
HOST
${XPU1_HOST_O_EXTRA_FLAGS}
XPU
"xpu2"
DEPENDS
${cc_srcs_depends})
endforeach()
add_custom_target(
${xpu_target}_src ALL
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
DEPENDS ${xpu_kernel_depends}
${CMAKE_CURRENT_BINARY_DIR}/lib${xpu_target}_xpu.a
COMMENT ${xpu_target}_src
VERBATIM)
add_custom_command(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/lib${xpu_target}_xpu.a
COMMAND ${HOST_AR} rcs ${CMAKE_CURRENT_BINARY_DIR}/lib${xpu_target}_xpu.a
${xpu_kernel_depends}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
DEPENDS ${xpu_kernel_depends}
COMMENT ${CMAKE_CURRENT_BINARY_DIR}/lib${xpu_target}_xpu.a
VERBATIM)
add_library(${xpu_target} STATIC ${cc_kernel_lists})
add_dependencies(${xpu_target} ${xpu_target}_src)
target_link_libraries(${TARGET_NAME}
${CMAKE_CURRENT_BINARY_DIR}/lib${xpu_target}_xpu.a)
else()
add_library(${xpu_target} STATIC ${cc_kernel_lists})
endif()
endmacro()
# For Readers and Developers
Thanks for reading PaddlePaddle documentation.
Since **September 17th, 2018**, the **0.15.0 and develop** documentation source has been moved to [FluidDoc Repo](https://github.com/PaddlePaddle/FluidDoc) and updated there.
Please turn to FluidDoc Repo for the latest documentation.
#!/bin/bash
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
PORT_FILE=/tmp/paddle_test_ports
PORT_LOCK_FILE=/tmp/paddle_test_ports.lock
# Create flag file, all user can rw, ignore all error here
touch $PORT_FILE $PORT_LOCK_FILE 2>/dev/null
chmod a+rw $PORT_FILE $PORT_LOCK_FILE 2>/dev/null
# acquire a range of ports that not used by other runtests.sh currentlly.
# return 1 if ports is used by other, otherwise return 0.
# NOTE: the acquire_ports/release_ports is interprocess mutexed.
#
# There are two parameter of this method
# param 1: the begin of port range
# param 2: the length of port range.
# so, the port range is [param1, param1+param2)
acquire_ports(){
(
flock -x 200
let "len=$1+$2"
for((i=$1; i<$len; i++))
do
grep -q $i $PORT_FILE
if [ $? -eq 0 ] ; then
return 1 # Port already write to $PORT_FILE
fi
done
for((i=$1; i<$len; i++))
do
echo $i >> $PORT_FILE # Write to $PORT_FILE
done
return 0
)200>$PORT_LOCK_FILE
}
# release a range of ports. Mark these ports is not used by runtests.sh.
# NOTE: the acquire_ports/release_ports is interprocess mutexed.
#
# The parameter is same as acquire_ports, see acquire_ports' comments.
release_ports(){
(
flock -x 200
let "len=$1+$2"
for((i=$1; i<$len; i++))
do
tmp=`sed "/$i/d" $PORT_FILE` # remove port
echo $tmp > $PORT_FILE
done
)200>$PORT_LOCK_FILE
}
# use set_port to get a random free port
# such as set_port -p port test_fuc to run test_fuc --port=random
# use -n to set_port test_fuc to get a continuous free port
# such as set_port -n 10 -p port test_fuc to get ten continuous free port to run test_fuc --port=random
set_port()
{
num=1
port_type="port"
unset OPTIND
while getopts "n:p:" opt
do
case "$opt" in
n) echo "get num ${OPTARG}"
num=${OPTARG}
;;
p) echo "get port_type ${OPTARG}"
port_type=${OPTARG}
;;
esac
done
shift $((OPTIND-1))
cmd=$@
for ((i=1;i<=10000;i++))
do
declare -i port=$RANDOM+10000
port_used_total=0
for((n=0;n<=num-1;n++))
do
declare -i port_check=$port+$n
port_used_num=`netstat -a |grep $port_check|wc -l`
declare -i port_used_total=$port_used_total+$port_used_num
done
if [ $port_used_total -ne 0 ]
then
continue
fi
# Lock Ports.
acquire_ports $port $num
if [ $? -ne 0 ]; then
continue
fi
$cmd --$port_type=$port
return_val=$?
release_ports $port $num
if [ $return_val -eq 0 ]; then
return 0
else
echo "$cmd run wrong"
return 1
fi
done
}
.timestamp
*.o
*.a
.svn
GPATH
GRTAGS
GTAGS
.idl*
*~
*.pyc
*.pb.cc
*.pb.h
*_pb2.py
output/
google/
Makefile
log/
.pptool_config
hf/
build
issue.info
ar
g++
gcc
ld
ld-linux-x86-64.so.2
x86_64-scm-linux-gnu/
.lint.*.md5
.idea/
.test_env
Paddle_wrap.cxx
Paddle_wrap.h
paddle.py
py_paddle-*.whl
py_paddle/paddle.py
.py_paddle_extra_link_flags
HPPL_ERROR_LOG
unittest.list
proto
dist
setup.py
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment