Commit 992bec46 authored by “yuguo”'s avatar “yuguo”
Browse files

2.5

parent 0259837d
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
if(NOT WITH_XPU) if(NOT WITH_XPU)
return() return()
endif() endif()
...@@ -6,83 +20,94 @@ include(ExternalProject) ...@@ -6,83 +20,94 @@ include(ExternalProject)
set(XPU_PROJECT "extern_xpu") set(XPU_PROJECT "extern_xpu")
set(XPU_API_LIB_NAME "libxpuapi.so") set(XPU_API_LIB_NAME "libxpuapi.so")
set(XPU_RT_LIB_NAME "libxpurt.so") set(XPU_RT_LIB_NAME "libxpurt.so")
set(XPU_XFT_LIB_NAME "libxft.so")
set(XPU_XPTI_LIB_NAME "libxpti.so")
if(NOT DEFINED XPU_BASE_DATE)
set(XPU_BASE_DATE "20230810")
endif()
set(XPU_XCCL_BASE_VERSION "1.0.53.6")
if(NOT DEFINED XPU_XFT_BASE_VERSION)
set(XPU_XFT_BASE_VERSION "20230602")
endif()
set(XPU_XPTI_BASE_VERSION "0.0.1")
if(NOT DEFINED XPU_BASE_URL) if(NOT DEFINED XPU_BASE_URL)
set(XPU_BASE_URL_WITHOUT_DATE set(XPU_BASE_URL_WITHOUT_DATE
"https://baidu-kunlun-product.su.bcebos.com/KL-SDK/klsdk-dev") "https://baidu-kunlun-product.su.bcebos.com/KL-SDK/klsdk-dev")
set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/20220919") set(XPU_BASE_URL "${XPU_BASE_URL_WITHOUT_DATE}/${XPU_BASE_DATE}")
else() else()
set(XPU_BASE_URL "${XPU_BASE_URL}") set(XPU_BASE_URL "${XPU_BASE_URL}")
endif() endif()
# ubuntu and centos: use output by XDNN API team set(XPU_XCCL_BASE_URL
if(NOT DEFINED XPU_XDNN_BASE_URL) "https://klx-sdk-release-public.su.bcebos.com/xccl/release/${XPU_XCCL_BASE_VERSION}"
set(XPU_XDNN_BASE_URL_WITHOUT_DATE )
"https://klx-sdk-release-public.su.bcebos.com/xdnn/dev")
set(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL_WITHOUT_DATE}/20220919") if(NOT XPU_XFT_BASE_URL)
else() set(XPU_XFT_BASE_URL
set(XPU_XDNN_BASE_URL "${XPU_XDNN_BASE_URL}") "https://klx-sdk-release-public.su.bcebos.com/xft/dev/${XPU_XFT_BASE_VERSION}"
)
endif() endif()
set(XPU_XCCL_BASE_URL set(XPU_XPTI_BASE_URL
"https://klx-sdk-release-public.su.bcebos.com/xccl/release/1.0.0") "https://klx-sdk-release-public.su.bcebos.com/xpti/dev/${XPU_XPTI_BASE_VERSION}"
)
if(WITH_XCCL_RDMA)
set(XPU_XCCL_PREFIX "xccl_rdma")
else()
set(XPU_XCCL_PREFIX "xccl_socket")
endif()
if(WITH_AARCH64) if(WITH_AARCH64)
set(XPU_XRE_DIR_NAME "xre-kylin_aarch64") set(XPU_XRE_DIR_NAME "xre-kylin_aarch64")
set(XPU_XDNN_DIR_NAME "xdnn-kylin_aarch64") set(XPU_XDNN_DIR_NAME "xdnn-kylin_aarch64")
set(XPU_XCCL_DIR_NAME "xccl-kylin_aarch64") set(XPU_XCCL_DIR_NAME "${XPU_XCCL_PREFIX}-kylin_aarch64")
set(XPU_XDNN_URL set(XPU_XFT_DIR_NAME "") # TODO: xft has no kylin output at now.
"${XPU_XDNN_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz"
CACHE STRING "" FORCE)
elseif(WITH_SUNWAY) elseif(WITH_SUNWAY)
set(XPU_XRE_DIR_NAME "xre-deepin_sw6_64") set(XPU_XRE_DIR_NAME "xre-deepin_sw6_64")
set(XPU_XDNN_DIR_NAME "xdnn-deepin_sw6_64") set(XPU_XDNN_DIR_NAME "xdnn-deepin_sw6_64")
set(XPU_XCCL_DIR_NAME "xccl-deepin_sw6_64") set(XPU_XCCL_DIR_NAME "${XPU_XCCL_PREFIX}-deepin_sw6_64")
set(XPU_XDNN_URL set(XPU_XFT_DIR_NAME "") # TODO: xft has no deepin output at now.
"${XPU_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz"
CACHE STRING "" FORCE)
elseif(WITH_BDCENTOS) elseif(WITH_BDCENTOS)
set(XPU_XRE_DIR_NAME "xre-bdcentos_x86_64") set(XPU_XRE_DIR_NAME "xre-bdcentos_x86_64")
set(XPU_XDNN_DIR_NAME "xdnn-bdcentos_x86_64") set(XPU_XDNN_DIR_NAME "xdnn-bdcentos_x86_64")
set(XPU_XCCL_DIR_NAME "xccl-bdcentos_x86_64") set(XPU_XCCL_DIR_NAME "${XPU_XCCL_PREFIX}-bdcentos_x86_64")
# ubuntu and centos: use output by XDNN API team set(XPU_XFT_DIR_NAME "xft_bdcentos6u3_x86_64_gcc82")
set(XPU_XDNN_URL
"${XPU_XDNN_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz"
CACHE STRING "" FORCE)
elseif(WITH_UBUNTU) elseif(WITH_UBUNTU)
set(XPU_XRE_DIR_NAME "xre-ubuntu_x86_64") set(XPU_XRE_DIR_NAME "xre-ubuntu_x86_64")
set(XPU_XDNN_DIR_NAME "xdnn-ubuntu_x86_64") set(XPU_XDNN_DIR_NAME "xdnn-ubuntu_x86_64")
set(XPU_XCCL_DIR_NAME "xccl-ubuntu_x86_64") set(XPU_XCCL_DIR_NAME "${XPU_XCCL_PREFIX}-ubuntu_x86_64")
# ubuntu and centos: use output by XDNN API team set(XPU_XFT_DIR_NAME "xft_ubuntu1604_x86_64")
set(XPU_XDNN_URL
"${XPU_XDNN_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz"
CACHE STRING "" FORCE)
elseif(WITH_CENTOS) elseif(WITH_CENTOS)
set(XPU_XRE_DIR_NAME "xre-centos7_x86_64") set(XPU_XRE_DIR_NAME "xre-centos7_x86_64")
set(XPU_XDNN_DIR_NAME "xdnn-bdcentos_x86_64") set(XPU_XDNN_DIR_NAME "xdnn-centos7_x86_64")
set(XPU_XCCL_DIR_NAME "xccl-bdcentos_x86_64") set(XPU_XCCL_DIR_NAME "${XPU_XCCL_PREFIX}-bdcentos_x86_64")
# ubuntu and centos: use output by XDNN API team set(XPU_XFT_DIR_NAME "xft_bdcentos6u3_x86_64_gcc82")
set(XPU_XDNN_URL
"${XPU_XDNN_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz"
CACHE STRING "" FORCE)
else() else()
set(XPU_XRE_DIR_NAME "xre-ubuntu_x86_64") set(XPU_XRE_DIR_NAME "xre-ubuntu_x86_64")
set(XPU_XDNN_DIR_NAME "xdnn-ubuntu_x86_64") set(XPU_XDNN_DIR_NAME "xdnn-ubuntu_x86_64")
set(XPU_XCCL_DIR_NAME "xccl-ubuntu_x86_64") set(XPU_XCCL_DIR_NAME "${XPU_XCCL_PREFIX}-ubuntu_x86_64")
# default: use output by XDNN API team set(XPU_XFT_DIR_NAME "xft_ubuntu1604_x86_64")
set(XPU_XDNN_URL
"${XPU_XDNN_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz"
CACHE STRING "" FORCE)
endif() endif()
set(XPU_XPTI_DIR_NAME "xpti")
set(XPU_XRE_URL set(XPU_XRE_URL
"${XPU_BASE_URL}/${XPU_XRE_DIR_NAME}.tar.gz" "${XPU_BASE_URL}/${XPU_XRE_DIR_NAME}.tar.gz"
CACHE STRING "" FORCE) CACHE STRING "" FORCE)
set(XPU_XDNN_URL
"${XPU_BASE_URL}/${XPU_XDNN_DIR_NAME}.tar.gz"
CACHE STRING "" FORCE)
set(XPU_XCCL_URL set(XPU_XCCL_URL
"${XPU_XCCL_BASE_URL}/${XPU_XCCL_DIR_NAME}.tar.gz" "${XPU_XCCL_BASE_URL}/${XPU_XCCL_DIR_NAME}.tar.gz"
CACHE STRING "" FORCE) CACHE STRING "" FORCE)
set(XPU_PACK_DEPENCE_URL set(XPU_XFT_URL "${XPU_XFT_BASE_URL}/${XPU_XFT_DIR_NAME}.tar.gz")
"https://baidu-kunlun-public.su.bcebos.com/paddle_depence/pack_paddle_depence.sh" set(XPU_XPTI_URL
"${XPU_XPTI_BASE_URL}/${XPU_XPTI_DIR_NAME}.tar.gz"
CACHE STRING "" FORCE)
set(XPU_XFT_GET_DEPENCE_URL
"https://baidu-kunlun-public.su.bcebos.com/paddle_depence/get_xft_dependence.sh"
CACHE STRING "" FORCE) CACHE STRING "" FORCE)
set(SNAPPY_PREFIX_DIR "${THIRD_PARTY_PATH}/xpu") set(SNAPPY_PREFIX_DIR "${THIRD_PARTY_PATH}/xpu")
...@@ -102,21 +127,38 @@ file( ...@@ -102,21 +127,38 @@ file(
"install(DIRECTORY xpu/include xpu/lib \n" "install(DIRECTORY xpu/include xpu/lib \n"
" DESTINATION ${XPU_INSTALL_DIR})\n") " DESTINATION ${XPU_INSTALL_DIR})\n")
if(WITH_XPU_BKCL)
message(STATUS "Compile with XPU BKCL!")
add_definitions(-DPADDLE_WITH_XPU_BKCL)
set(XPU_BKCL_LIB_NAME "libbkcl.so")
set(XPU_BKCL_LIB "${XPU_LIB_DIR}/${XPU_BKCL_LIB_NAME}")
set(XPU_BKCL_INC_DIR "${THIRD_PARTY_PATH}/install/xpu/include")
include_directories(${XPU_BKCL_INC_DIR})
endif()
ExternalProject_Add( ExternalProject_Add(
${XPU_PROJECT} ${XPU_PROJECT}
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
PREFIX ${SNAPPY_PREFIX_DIR} PREFIX ${SNAPPY_PREFIX_DIR}
DOWNLOAD_DIR ${XPU_DOWNLOAD_DIR} DOWNLOAD_DIR ${XPU_DOWNLOAD_DIR}
DOWNLOAD_COMMAND DOWNLOAD_COMMAND
wget ${XPU_PACK_DEPENCE_URL} && bash pack_paddle_depence.sh ${XPU_XRE_URL} bash ${CMAKE_SOURCE_DIR}/tools/xpu/check_xpu_dependence.sh ${XPU_BASE_URL}
${XPU_XCCL_BASE_URL} && bash
${CMAKE_SOURCE_DIR}/tools/xpu/pack_paddle_depence.sh ${XPU_XRE_URL}
${XPU_XRE_DIR_NAME} ${XPU_XDNN_URL} ${XPU_XDNN_DIR_NAME} ${XPU_XCCL_URL} ${XPU_XRE_DIR_NAME} ${XPU_XDNN_URL} ${XPU_XDNN_DIR_NAME} ${XPU_XCCL_URL}
${XPU_XCCL_DIR_NAME} ${XPU_XCCL_DIR_NAME} && wget ${XPU_XFT_GET_DEPENCE_URL} && bash
get_xft_dependence.sh ${XPU_XFT_URL} ${XPU_XFT_DIR_NAME} &&
WITH_XPTI=${WITH_XPTI} bash
${CMAKE_SOURCE_DIR}/tools/xpu/get_xpti_dependence.sh ${XPU_XPTI_URL}
${XPU_XPTI_DIR_NAME}
DOWNLOAD_NO_PROGRESS 1 DOWNLOAD_NO_PROGRESS 1
UPDATE_COMMAND "" UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${XPU_INSTALL_ROOT} CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${XPU_INSTALL_ROOT}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${XPU_INSTALL_ROOT} CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${XPU_INSTALL_ROOT}
BUILD_BYPRODUCTS ${XPU_API_LIB} BUILD_BYPRODUCTS ${XPU_API_LIB}
BUILD_BYPRODUCTS ${XPU_RT_LIB}) BUILD_BYPRODUCTS ${XPU_RT_LIB}
BUILD_BYPRODUCTS ${XPU_BKCL_LIB})
include_directories(${XPU_INC_DIR}) include_directories(${XPU_INC_DIR})
add_library(shared_xpuapi SHARED IMPORTED GLOBAL) add_library(shared_xpuapi SHARED IMPORTED GLOBAL)
...@@ -128,19 +170,42 @@ generate_dummy_static_lib(LIB_NAME "xpulib" GENERATOR "xpu.cmake") ...@@ -128,19 +170,42 @@ generate_dummy_static_lib(LIB_NAME "xpulib" GENERATOR "xpu.cmake")
target_link_libraries(xpulib ${XPU_API_LIB} ${XPU_RT_LIB}) target_link_libraries(xpulib ${XPU_API_LIB} ${XPU_RT_LIB})
if(WITH_XPU_BKCL) if(WITH_XPU_XFT)
message(STATUS "Compile with XPU BKCL!") message(STATUS "Compile with XPU XFT!")
add_definitions(-DPADDLE_WITH_XPU_BKCL) add_definitions(-DPADDLE_WITH_XPU_XFT)
set(XPU_BKCL_LIB_NAME "libbkcl.so") set(XPU_XFT_INC_DIR "${XPU_INC_DIR}/xft")
set(XPU_BKCL_LIB "${XPU_LIB_DIR}/${XPU_BKCL_LIB_NAME}") include_directories(${XPU_XFT_INC_DIR})
set(XPU_BKCL_INC_DIR "${THIRD_PARTY_PATH}/install/xpu/include") set(XPU_XFT_LIB "${XPU_LIB_DIR}/${XPU_XFT_LIB_NAME}")
include_directories(${XPU_BKCL_INC_DIR}) endif()
if(WITH_XPTI)
message(STATUS "Compile with XPU XPTI!")
add_definitions(-DPADDLE_WITH_XPTI)
set(XPU_XPTI_LIB "${XPU_LIB_DIR}/${XPU_XPTI_LIB_NAME}")
endif()
if(WITH_XPU_PLUGIN)
message(STATUS "Compile with XPU PLUGIN!")
add_definitions(-DPADDLE_WITH_XPU_PLUGIN)
include_directories(${CMAKE_SOURCE_DIR}/paddle/phi/kernels/xpu/plugin/include)
endif()
if(WITH_XPU_BKCL AND WITH_XPU_XFT)
target_link_libraries(xpulib ${XPU_API_LIB} ${XPU_RT_LIB} ${XPU_BKCL_LIB}
${XPU_XFT_LIB})
elseif(WITH_XPU_BKCL)
target_link_libraries(xpulib ${XPU_API_LIB} ${XPU_RT_LIB} ${XPU_BKCL_LIB}) target_link_libraries(xpulib ${XPU_API_LIB} ${XPU_RT_LIB} ${XPU_BKCL_LIB})
elseif(WITH_XPU_XFT)
target_link_libraries(xpulib ${XPU_API_LIB} ${XPU_RT_LIB} ${XPU_XFT_LIB})
else() else()
target_link_libraries(xpulib ${XPU_API_LIB} ${XPU_RT_LIB}) target_link_libraries(xpulib ${XPU_API_LIB} ${XPU_RT_LIB})
endif() endif()
if(WITH_XPTI)
target_link_libraries(xpulib ${XPU_XPTI_LIB})
endif()
add_dependencies(xpulib ${XPU_PROJECT}) add_dependencies(xpulib ${XPU_PROJECT})
# Ensure that xpu/api.h can be included without dependency errors. # Ensure that xpu/api.h can be included without dependency errors.
......
...@@ -18,8 +18,8 @@ set(XXHASH_PREFIX_DIR ${THIRD_PARTY_PATH}/xxhash) ...@@ -18,8 +18,8 @@ set(XXHASH_PREFIX_DIR ${THIRD_PARTY_PATH}/xxhash)
set(XXHASH_SOURCE_DIR ${THIRD_PARTY_PATH}/xxhash/src/extern_xxhash) set(XXHASH_SOURCE_DIR ${THIRD_PARTY_PATH}/xxhash/src/extern_xxhash)
set(XXHASH_INSTALL_DIR ${THIRD_PARTY_PATH}/install/xxhash) set(XXHASH_INSTALL_DIR ${THIRD_PARTY_PATH}/install/xxhash)
set(XXHASH_INCLUDE_DIR "${XXHASH_INSTALL_DIR}/include") set(XXHASH_INCLUDE_DIR "${XXHASH_INSTALL_DIR}/include")
set(XXHASH_REPOSITORY ${GIT_URL}/Cyan4973/xxHash.git)
set(XXHASH_TAG v0.6.5) set(XXHASH_TAG v0.6.5)
set(SOURCE_DIR ${PADDLE_SOURCE_DIR}/third_party/xxhash)
include_directories(${XXHASH_INCLUDE_DIR}) include_directories(${XXHASH_INCLUDE_DIR})
...@@ -29,7 +29,7 @@ if(APPLE) ...@@ -29,7 +29,7 @@ if(APPLE)
-i -i
\"\" \"\"
"s/-Wstrict-prototypes -Wundef/-Wstrict-prototypes -Wundef -fPIC/g" "s/-Wstrict-prototypes -Wundef/-Wstrict-prototypes -Wundef -fPIC/g"
${XXHASH_SOURCE_DIR}/Makefile ${SOURCE_DIR}/Makefile
&& &&
make make
lib) lib)
...@@ -38,7 +38,7 @@ elseif(UNIX) ...@@ -38,7 +38,7 @@ elseif(UNIX)
sed sed
-i -i
"s/-Wstrict-prototypes -Wundef/-Wstrict-prototypes -Wundef -fPIC/g" "s/-Wstrict-prototypes -Wundef/-Wstrict-prototypes -Wundef -fPIC/g"
${XXHASH_SOURCE_DIR}/Makefile ${SOURCE_DIR}/Makefile
&& &&
make make
lib) lib)
...@@ -57,14 +57,13 @@ endif() ...@@ -57,14 +57,13 @@ endif()
if(WIN32) if(WIN32)
ExternalProject_Add( ExternalProject_Add(
extern_xxhash extern_xxhash
${EXTERNAL_PROJECT_LOG_ARGS} ${SHALLOW_CLONE} ${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY ${XXHASH_REPOSITORY} SOURCE_DIR ${SOURCE_DIR}
GIT_TAG ${XXHASH_TAG}
PREFIX ${XXHASH_PREFIX_DIR} PREFIX ${XXHASH_PREFIX_DIR}
UPDATE_COMMAND "" UPDATE_COMMAND ""
PATCH_COMMAND "" PATCH_COMMAND ""
CONFIGURE_COMMAND CONFIGURE_COMMAND
${CMAKE_COMMAND} ${XXHASH_SOURCE_DIR}/cmake_unofficial ${CMAKE_COMMAND} ${SOURCE_DIR}/cmake_unofficial
-DCMAKE_INSTALL_PREFIX:PATH=${XXHASH_INSTALL_DIR} -DCMAKE_INSTALL_PREFIX:PATH=${XXHASH_INSTALL_DIR}
-DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE} -DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}
-DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF -DCMAKE_VERBOSE_MAKEFILE:BOOL=OFF
...@@ -83,8 +82,7 @@ else() ...@@ -83,8 +82,7 @@ else()
ExternalProject_Add( ExternalProject_Add(
extern_xxhash extern_xxhash
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY ${XXHASH_REPOSITORY} SOURCE_DIR ${SOURCE_DIR}
GIT_TAG ${XXHASH_TAG}
PREFIX ${XXHASH_PREFIX_DIR} PREFIX ${XXHASH_PREFIX_DIR}
UPDATE_COMMAND "" UPDATE_COMMAND ""
CONFIGURE_COMMAND "" CONFIGURE_COMMAND ""
......
...@@ -22,9 +22,8 @@ set(ZLIB_ROOT ...@@ -22,9 +22,8 @@ set(ZLIB_ROOT
set(ZLIB_INCLUDE_DIR set(ZLIB_INCLUDE_DIR
"${ZLIB_INSTALL_DIR}/include" "${ZLIB_INSTALL_DIR}/include"
CACHE PATH "zlib include directory." FORCE) CACHE PATH "zlib include directory." FORCE)
set(ZLIB_REPOSITORY ${GIT_URL}/madler/zlib.git)
set(ZLIB_TAG v1.2.8) set(ZLIB_TAG v1.2.8)
set(SOURCE_DIR ${PADDLE_SOURCE_DIR}/third_party/zlib)
include_directories(${ZLIB_INCLUDE_DIR} include_directories(${ZLIB_INCLUDE_DIR}
)# For zlib code to include its own headers. )# For zlib code to include its own headers.
include_directories(${THIRD_PARTY_PATH}/install include_directories(${THIRD_PARTY_PATH}/install
...@@ -42,9 +41,8 @@ endif() ...@@ -42,9 +41,8 @@ endif()
ExternalProject_Add( ExternalProject_Add(
extern_zlib extern_zlib
${EXTERNAL_PROJECT_LOG_ARGS} ${SHALLOW_CLONE} ${EXTERNAL_PROJECT_LOG_ARGS}
GIT_REPOSITORY ${ZLIB_REPOSITORY} SOURCE_DIR ${SOURCE_DIR}
GIT_TAG ${ZLIB_TAG}
PREFIX ${ZLIB_PREFIX_DIR} PREFIX ${ZLIB_PREFIX_DIR}
UPDATE_COMMAND "" UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} CMAKE_ARGS -DCMAKE_C_COMPILER=${CMAKE_C_COMPILER}
......
...@@ -35,9 +35,9 @@ endfunction() ...@@ -35,9 +35,9 @@ endfunction()
checkcompilercxx14flag() checkcompilercxx14flag()
if(NOT WIN32) if(NOT WIN32)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17")
else() else()
set(CMAKE_CXX_STANDARD 14) set(CMAKE_CXX_STANDARD 17)
endif() endif()
# safe_set_flag # safe_set_flag
...@@ -144,17 +144,12 @@ if(NOT WIN32) ...@@ -144,17 +144,12 @@ if(NOT WIN32)
-Werror -Werror
-Wall -Wall
-Wextra -Wextra
-Wnon-virtual-dtor
-Wdelete-non-virtual-dtor
-Wno-unused-parameter -Wno-unused-parameter
-Wno-unused-function -Wno-unused-function
-Wno-error=literal-suffix -Wno-error=array-bounds #Warning in Eigen, gcc 12.2
-Wno-error=unused-local-typedefs
-Wno-error=ignored-attributes # Warnings in Eigen, gcc 6.3 -Wno-error=ignored-attributes # Warnings in Eigen, gcc 6.3
-Wno-error=terminate # Warning in PADDLE_ENFORCE
-Wno-error=int-in-bool-context # Warning in Eigen gcc 7.2 -Wno-error=int-in-bool-context # Warning in Eigen gcc 7.2
-Wimplicit-fallthrough=0 # Warning in tinyformat.h -Wimplicit-fallthrough=0 # Warning in tinyformat.h
-Wno-error=maybe-uninitialized # Warning in boost gcc 7.2
${fsanitize}) ${fsanitize})
if(WITH_IPU) if(WITH_IPU)
...@@ -163,20 +158,15 @@ if(NOT WIN32) ...@@ -163,20 +158,15 @@ if(NOT WIN32)
) )
endif() endif()
if(WITH_ASCEND_CL AND WITH_ARM_BRPC) if(WITH_CUDNN_FRONTEND)
set(COMMON_FLAGS ${COMMON_FLAGS} -faligned-new) # flags from https://github.com/NVIDIA/cudnn-frontend/blob/v0.7.1/CMakeLists.txt
set(COMMON_FLAGS ${COMMON_FLAGS} -Wno-sign-compare -Wno-non-virtual-dtor)
endif() endif()
if(NOT APPLE) if(NOT APPLE)
if((${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER 8.0) OR (WITH_ROCM)) if((${CMAKE_CXX_COMPILER_VERSION} VERSION_GREATER 8.0) OR (WITH_ROCM))
set(COMMON_FLAGS set(COMMON_FLAGS
${COMMON_FLAGS} ${COMMON_FLAGS} -Wno-ignored-qualifiers # Warning in Paddle-Lite
-Wno-format-truncation # Warning in boost gcc 8.2
-Wno-error=parentheses # Warning in boost gcc 8.2
-Wno-error=catch-value # Warning in boost gcc 8.2
-Wno-error=nonnull-compare # Warning in boost gcc 8.2
-Wno-error=address # Warning in boost gcc 8.2
-Wno-ignored-qualifiers # Warning in boost gcc 8.2
-Wno-ignored-attributes # Warning in Eigen gcc 8.3 -Wno-ignored-attributes # Warning in Eigen gcc 8.3
-Wno-parentheses # Warning in Eigen gcc 8.3 -Wno-parentheses # Warning in Eigen gcc 8.3
) )
...@@ -198,7 +188,8 @@ if(NOT WIN32) ...@@ -198,7 +188,8 @@ if(NOT WIN32)
if(NOT WITH_NV_JETSON if(NOT WITH_NV_JETSON
AND NOT WITH_ARM AND NOT WITH_ARM
AND NOT WITH_SW AND NOT WITH_SW
AND NOT WITH_MIPS) AND NOT WITH_MIPS
AND NOT WITH_LOONGARCH)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -m64")
endif() endif()
endif() endif()
...@@ -214,7 +205,16 @@ if(APPLE) ...@@ -214,7 +205,16 @@ if(APPLE)
CACHE STRING "Build architectures for OSX" FORCE) CACHE STRING "Build architectures for OSX" FORCE)
endif() endif()
# On Mac OS X register class specifier is deprecated and will cause warning error on latest clang 10.0 # On Mac OS X register class specifier is deprecated and will cause warning error on latest clang 10.0
set(COMMON_FLAGS -Wno-deprecated-register) set(COMMON_FLAGS
-Wno-deprecated-register
-Werror=format
-Werror=inconsistent-missing-override
-Werror=braced-scalar-init
-Werror=uninitialized
-Werror=tautological-constant-out-of-range-compare
-Werror=literal-conversion
-Werror=pragma-pack
-Werror=c++17-extensions)
endif() endif()
if(WITH_HETERPS AND WITH_PSLIB) if(WITH_HETERPS AND WITH_PSLIB)
...@@ -242,7 +242,7 @@ if(WITH_GPU) ...@@ -242,7 +242,7 @@ if(WITH_GPU)
endif() endif()
if(WITH_ROCM) if(WITH_ROCM)
set(HIP_HIPCC_FLAGS "${HIP_HIPCC_FLAGS} ${SAFE_GPU_COMMON_FLAGS} --gpu-max-threads-per-block=1024") set(HIP_HIPCC_FLAGS "${HIP_HIPCC_FLAGS} ${SAFE_GPU_COMMON_FLAGS}")
endif() endif()
# Disable -Werror, otherwise the compile will fail for rocblas_gemm_ex # Disable -Werror, otherwise the compile will fail for rocblas_gemm_ex
...@@ -251,7 +251,9 @@ if(WITH_ROCM) ...@@ -251,7 +251,9 @@ if(WITH_ROCM)
string(REPLACE "-Werror" "-Wno-error" CMAKE_C_FLAGS ${CMAKE_C_FLAGS}) string(REPLACE "-Werror" "-Wno-error" CMAKE_C_FLAGS ${CMAKE_C_FLAGS})
endif() endif()
if(WITH_PSCORE OR WITH_PSLIB) if(WITH_PSCORE
OR WITH_PSLIB
OR WITH_TENSORRT)
string(REPLACE "-Wnon-virtual-dtor" "-Wno-non-virtual-dtor" CMAKE_CXX_FLAGS string(REPLACE "-Wnon-virtual-dtor" "-Wno-non-virtual-dtor" CMAKE_CXX_FLAGS
${CMAKE_CXX_FLAGS}) ${CMAKE_CXX_FLAGS})
string(REPLACE "-Wnon-virtual-dtor" "-Wno-non-virtual-dtor" CMAKE_C_FLAGS string(REPLACE "-Wnon-virtual-dtor" "-Wno-non-virtual-dtor" CMAKE_C_FLAGS
......
...@@ -96,7 +96,7 @@ if(NOT APPLE AND NOT WIN32) ...@@ -96,7 +96,7 @@ if(NOT APPLE AND NOT WIN32)
link_libraries(${CMAKE_THREAD_LIBS_INIT}) link_libraries(${CMAKE_THREAD_LIBS_INIT})
if(WITH_PSLIB OR WITH_DISTRIBUTE) if(WITH_PSLIB OR WITH_DISTRIBUTE)
set(CMAKE_CXX_LINK_EXECUTABLE set(CMAKE_CXX_LINK_EXECUTABLE
"${CMAKE_CXX_LINK_EXECUTABLE} -pthread -ldl -lrt -lz -lssl") "${CMAKE_CXX_LINK_EXECUTABLE} -pthread -ldl -lrt -lz -lssl -lcrypto")
else() else()
set(CMAKE_CXX_LINK_EXECUTABLE set(CMAKE_CXX_LINK_EXECUTABLE
"${CMAKE_CXX_LINK_EXECUTABLE} -pthread -ldl -lrt") "${CMAKE_CXX_LINK_EXECUTABLE} -pthread -ldl -lrt")
...@@ -195,6 +195,7 @@ function(create_dummy_static_lib TARGET_NAME) ...@@ -195,6 +195,7 @@ function(create_dummy_static_lib TARGET_NAME)
# the dummy target would be consisted of limit size libraries # the dummy target would be consisted of limit size libraries
set(limit ${merge_LIMIT}) set(limit ${merge_LIMIT})
list(LENGTH merge_LIBS libs_len) list(LENGTH merge_LIBS libs_len)
message("libs_len ${libs_len}")
foreach(lib ${merge_LIBS}) foreach(lib ${merge_LIBS})
list(APPEND merge_list ${lib}) list(APPEND merge_list ${lib})
list(LENGTH merge_list listlen) list(LENGTH merge_list listlen)
...@@ -363,20 +364,7 @@ function(cc_library TARGET_NAME) ...@@ -363,20 +364,7 @@ function(cc_library TARGET_NAME)
list(REMOVE_ITEM cc_library_DEPS warpctc) list(REMOVE_ITEM cc_library_DEPS warpctc)
add_dependencies(${TARGET_NAME} warpctc) add_dependencies(${TARGET_NAME} warpctc)
endif() endif()
# Only deps libmklml.so, not link
if("${cc_library_DEPS};" MATCHES "mklml;")
list(REMOVE_ITEM cc_library_DEPS mklml)
if(NOT "${TARGET_NAME}" MATCHES "dynload_mklml")
list(APPEND cc_library_DEPS dynload_mklml)
endif()
add_dependencies(${TARGET_NAME} mklml)
if(WIN32)
target_link_libraries(${TARGET_NAME} ${MKLML_IOMP_LIB})
else()
target_link_libraries(${TARGET_NAME}
"-L${MKLML_LIB_DIR} -liomp5 -Wl,--as-needed")
endif()
endif()
# remove link to python, see notes at: # remove link to python, see notes at:
# https://github.com/pybind/pybind11/blob/master/docs/compiling.rst#building-manually # https://github.com/pybind/pybind11/blob/master/docs/compiling.rst#building-manually
if("${cc_library_DEPS};" MATCHES "python;") if("${cc_library_DEPS};" MATCHES "python;")
...@@ -456,25 +444,10 @@ function(cc_test_build TARGET_NAME) ...@@ -456,25 +444,10 @@ function(cc_test_build TARGET_NAME)
endif() endif()
endif() endif()
get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES) get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES)
target_link_libraries( target_link_libraries(${TARGET_NAME} ${cc_test_DEPS}
${TARGET_NAME} ${os_dependency_modules} paddle_gtest_main gtest glog)
${cc_test_DEPS} add_dependencies(${TARGET_NAME} ${cc_test_DEPS} paddle_gtest_main gtest
${os_dependency_modules} glog)
paddle_gtest_main
lod_tensor
memory
gtest
gflags
glog)
add_dependencies(
${TARGET_NAME}
${cc_test_DEPS}
paddle_gtest_main
lod_tensor
memory
gtest
gflags
glog)
common_link(${TARGET_NAME}) common_link(${TARGET_NAME})
if(WITH_ROCM) if(WITH_ROCM)
target_link_libraries(${TARGET_NAME} ${ROCM_HIPRTC_LIB}) target_link_libraries(${TARGET_NAME} ${ROCM_HIPRTC_LIB})
...@@ -485,14 +458,17 @@ endfunction() ...@@ -485,14 +458,17 @@ endfunction()
function(cc_test_run TARGET_NAME) function(cc_test_run TARGET_NAME)
if(WITH_TESTING) if(WITH_TESTING)
set(oneValueArgs "") set(oneValueArgs DIR)
set(multiValueArgs COMMAND ARGS) set(multiValueArgs COMMAND ARGS)
cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}" cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}"
"${multiValueArgs}" ${ARGN}) "${multiValueArgs}" ${ARGN})
if(cc_test_DIR STREQUAL "")
set(cc_test_DIR ${CMAKE_CURRENT_BINARY_DIR})
endif()
add_test( add_test(
NAME ${TARGET_NAME} NAME ${TARGET_NAME}
COMMAND ${cc_test_COMMAND} ${cc_test_ARGS} COMMAND ${cc_test_COMMAND} ${cc_test_ARGS}
WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}) WORKING_DIRECTORY ${cc_test_DIR})
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT
FLAGS_cpu_deterministic=true) FLAGS_cpu_deterministic=true)
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT
...@@ -512,7 +488,57 @@ function(cc_test_run TARGET_NAME) ...@@ -512,7 +488,57 @@ function(cc_test_run TARGET_NAME)
endif() endif()
endfunction() endfunction()
set_property(GLOBAL PROPERTY TEST_SRCS "")
set_property(GLOBAL PROPERTY TEST_NAMES "")
function(cc_test TARGET_NAME) function(cc_test TARGET_NAME)
if(WITH_TESTING)
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS ARGS)
cmake_parse_arguments(cc_test "${options}" "${oneValueArgs}"
"${multiValueArgs}" ${ARGN})
if(WIN32)
# NOTE(zhiqiu): on windows platform, the symbols should be exported
# explicitly by __declspec(dllexport), however, there are serveral
# symbols not exported, and link error occurs.
# so, the tests are not built against dynamic libraries now.
cc_test_old(
${TARGET_NAME}
SRCS
${cc_test_SRCS}
DEPS
${cc_test_DEPS}
ARGS
${cc_test_ARGS})
else()
list(LENGTH cc_test_SRCS len)
# message("cc_test_SRCS ${cc_test_SRCS}")
# message("cc_test_ARGS ${cc_test_ARGS}")
if(${len} GREATER 1)
message(
SEND_ERROR
"The number source file of cc_test should be 1, but got ${len}, the source files are: ${cc_test_SRCS}"
)
endif()
list(LENGTH cc_test_ARGS len_arg)
if(len_arg GREATER_EQUAL 1)
set_property(GLOBAL PROPERTY "${TARGET_NAME}_ARGS" "${cc_test_ARGS}")
#message("${TARGET_NAME}_ARGS arg ${arg}")
endif()
get_property(test_srcs GLOBAL PROPERTY TEST_SRCS)
set(test_srcs ${test_srcs} "${CMAKE_CURRENT_SOURCE_DIR}/${cc_test_SRCS}")
set_property(GLOBAL PROPERTY TEST_SRCS "${test_srcs}")
get_property(test_names GLOBAL PROPERTY TEST_NAMES)
set(test_names ${test_names} ${TARGET_NAME})
set_property(GLOBAL PROPERTY TEST_NAMES "${test_names}")
endif()
endif()
endfunction()
function(cc_test_old TARGET_NAME)
if(WITH_TESTING) if(WITH_TESTING)
set(oneValueArgs "") set(oneValueArgs "")
set(multiValueArgs SRCS DEPS ARGS) set(multiValueArgs SRCS DEPS ARGS)
...@@ -521,17 +547,7 @@ function(cc_test TARGET_NAME) ...@@ -521,17 +547,7 @@ function(cc_test TARGET_NAME)
cc_test_build(${TARGET_NAME} SRCS ${cc_test_SRCS} DEPS ${cc_test_DEPS}) cc_test_build(${TARGET_NAME} SRCS ${cc_test_SRCS} DEPS ${cc_test_DEPS})
# we dont test hcom op, because it need complex configuration # we dont test hcom op, because it need complex configuration
# with more than one machine # with more than one machine
if(NOT cc_test_run(${TARGET_NAME} COMMAND ${TARGET_NAME} ARGS ${cc_test_ARGS})
("${TARGET_NAME}" STREQUAL "c_broadcast_op_npu_test"
OR "${TARGET_NAME}" STREQUAL "c_allreduce_sum_op_npu_test"
OR "${TARGET_NAME}" STREQUAL "c_allreduce_max_op_npu_test"
OR "${TARGET_NAME}" STREQUAL "c_reducescatter_op_npu_test"
OR "${TARGET_NAME}" STREQUAL "c_allgather_op_npu_test"
OR "${TARGET_NAME}" STREQUAL "send_v2_op_npu_test"
OR "${TARGET_NAME}" STREQUAL "c_reduce_sum_op_npu_test"
OR "${TARGET_NAME}" STREQUAL "recv_v2_op_npu_test"))
cc_test_run(${TARGET_NAME} COMMAND ${TARGET_NAME} ARGS ${cc_test_ARGS})
endif()
elseif(WITH_TESTING AND NOT TEST ${TARGET_NAME}) elseif(WITH_TESTING AND NOT TEST ${TARGET_NAME})
add_test(NAME ${TARGET_NAME} COMMAND ${CMAKE_COMMAND} -E echo CI skip add_test(NAME ${TARGET_NAME} COMMAND ${CMAKE_COMMAND} -E echo CI skip
${TARGET_NAME}.) ${TARGET_NAME}.)
...@@ -625,25 +641,9 @@ function(nv_test TARGET_NAME) ...@@ -625,25 +641,9 @@ function(nv_test TARGET_NAME)
# Reference: https://cmake.org/cmake/help/v3.10/module/FindCUDA.html # Reference: https://cmake.org/cmake/help/v3.10/module/FindCUDA.html
add_executable(${TARGET_NAME} ${nv_test_SRCS}) add_executable(${TARGET_NAME} ${nv_test_SRCS})
get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES) get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES)
target_link_libraries( target_link_libraries(${TARGET_NAME} ${nv_test_DEPS}
${TARGET_NAME} ${os_dependency_modules} paddle_gtest_main phi)
${nv_test_DEPS} add_dependencies(${TARGET_NAME} ${nv_test_DEPS} paddle_gtest_main)
paddle_gtest_main
lod_tensor
memory
gtest
gflags
glog
${os_dependency_modules})
add_dependencies(
${TARGET_NAME}
${nv_test_DEPS}
paddle_gtest_main
lod_tensor
memory
gtest
gflags
glog)
common_link(${TARGET_NAME}) common_link(${TARGET_NAME})
add_test(${TARGET_NAME} ${TARGET_NAME}) add_test(${TARGET_NAME} ${TARGET_NAME})
set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT set_property(TEST ${TARGET_NAME} PROPERTY ENVIRONMENT
...@@ -746,8 +746,8 @@ function(hip_test TARGET_NAME) ...@@ -746,8 +746,8 @@ function(hip_test TARGET_NAME)
lod_tensor lod_tensor
memory memory
gtest gtest
gflags
glog glog
phi
${os_dependency_modules}) ${os_dependency_modules})
add_dependencies( add_dependencies(
${TARGET_NAME} ${TARGET_NAME}
...@@ -756,7 +756,7 @@ function(hip_test TARGET_NAME) ...@@ -756,7 +756,7 @@ function(hip_test TARGET_NAME)
lod_tensor lod_tensor
memory memory
gtest gtest
gflags phi
glog) glog)
common_link(${TARGET_NAME}) common_link(${TARGET_NAME})
add_test(${TARGET_NAME} ${TARGET_NAME}) add_test(${TARGET_NAME} ${TARGET_NAME})
...@@ -853,7 +853,7 @@ function(xpu_test TARGET_NAME) ...@@ -853,7 +853,7 @@ function(xpu_test TARGET_NAME)
lod_tensor lod_tensor
memory memory
gtest gtest
gflags phi
glog glog
${os_dependency_modules}) ${os_dependency_modules})
add_dependencies( add_dependencies(
...@@ -863,7 +863,7 @@ function(xpu_test TARGET_NAME) ...@@ -863,7 +863,7 @@ function(xpu_test TARGET_NAME)
lod_tensor lod_tensor
memory memory
gtest gtest
gflags phi
glog) glog)
common_link(${TARGET_NAME}) common_link(${TARGET_NAME})
add_test(${TARGET_NAME} ${TARGET_NAME}) add_test(${TARGET_NAME} ${TARGET_NAME})
...@@ -1025,8 +1025,8 @@ function(paddle_protobuf_generate_cpp SRCS HDRS) ...@@ -1025,8 +1025,8 @@ function(paddle_protobuf_generate_cpp SRCS HDRS)
add_custom_command( add_custom_command(
OUTPUT "${_protobuf_protoc_src}" "${_protobuf_protoc_hdr}" OUTPUT "${_protobuf_protoc_src}" "${_protobuf_protoc_hdr}"
COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_CURRENT_BINARY_DIR}" COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_CURRENT_BINARY_DIR}"
COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} -I${CMAKE_CURRENT_SOURCE_DIR} COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} -I${PADDLE_SOURCE_DIR} --cpp_out
--cpp_out "${CMAKE_CURRENT_BINARY_DIR}" ${ABS_FIL} "${PADDLE_BINARY_DIR}" ${ABS_FIL}
# Set `EXTERN_PROTOBUF_DEPEND` only if need to compile `protoc.exe`. # Set `EXTERN_PROTOBUF_DEPEND` only if need to compile `protoc.exe`.
DEPENDS ${ABS_FIL} ${EXTERN_PROTOBUF_DEPEND} DEPENDS ${ABS_FIL} ${EXTERN_PROTOBUF_DEPEND}
COMMENT "Running C++ protocol buffer compiler on ${FIL}" COMMENT "Running C++ protocol buffer compiler on ${FIL}"
......
...@@ -96,6 +96,7 @@ list(APPEND HIP_CXX_FLAGS -fPIC) ...@@ -96,6 +96,7 @@ list(APPEND HIP_CXX_FLAGS -fPIC)
list(APPEND HIP_CXX_FLAGS -D__HIP_PLATFORM_HCC__=1) list(APPEND HIP_CXX_FLAGS -D__HIP_PLATFORM_HCC__=1)
# Note(qili93): HIP has compile conflicts of float16.h as platform::float16 overload std::is_floating_point and std::is_integer # Note(qili93): HIP has compile conflicts of float16.h as platform::float16 overload std::is_floating_point and std::is_integer
list(APPEND HIP_CXX_FLAGS -D__HIP_NO_HALF_CONVERSIONS__=1) list(APPEND HIP_CXX_FLAGS -D__HIP_NO_HALF_CONVERSIONS__=1)
list(APPEND HIP_CXX_FLAGS -mllvm -amdgpu-enable-flat-scratch=false)
list(APPEND HIP_CXX_FLAGS -Wno-macro-redefined) list(APPEND HIP_CXX_FLAGS -Wno-macro-redefined)
list(APPEND HIP_CXX_FLAGS -Wno-inconsistent-missing-override) list(APPEND HIP_CXX_FLAGS -Wno-inconsistent-missing-override)
list(APPEND HIP_CXX_FLAGS -Wno-exceptions) list(APPEND HIP_CXX_FLAGS -Wno-exceptions)
...@@ -106,7 +107,21 @@ list(APPEND HIP_CXX_FLAGS -Wno-duplicate-decl-specifier) ...@@ -106,7 +107,21 @@ list(APPEND HIP_CXX_FLAGS -Wno-duplicate-decl-specifier)
list(APPEND HIP_CXX_FLAGS -Wno-implicit-int-float-conversion) list(APPEND HIP_CXX_FLAGS -Wno-implicit-int-float-conversion)
list(APPEND HIP_CXX_FLAGS -Wno-pass-failed) list(APPEND HIP_CXX_FLAGS -Wno-pass-failed)
list(APPEND HIP_CXX_FLAGS -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_HIP) list(APPEND HIP_CXX_FLAGS -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_HIP)
list(APPEND HIP_CXX_FLAGS -std=c++14) list(APPEND HIP_CXX_FLAGS -Wno-unused-result)
list(APPEND HIP_CXX_FLAGS -Wno-deprecated-declarations)
list(APPEND HIP_CXX_FLAGS -Wno-format)
list(APPEND HIP_CXX_FLAGS -Wno-dangling-gsl)
list(APPEND HIP_CXX_FLAGS -Wno-unused-value)
list(APPEND HIP_CXX_FLAGS -Wno-braced-scalar-init)
list(APPEND HIP_CXX_FLAGS -Wno-return-type)
list(APPEND HIP_CXX_FLAGS -Wno-pragma-once-outside-header)
if(WITH_CINN)
list(APPEND HIP_CXX_FLAGS -std=c++14)
else()
list(APPEND HIP_CXX_FLAGS -std=c++17)
endif()
list(APPEND HIP_CXX_FLAGS --gpu-max-threads-per-block=1024)
if(CMAKE_BUILD_TYPE MATCHES Debug) if(CMAKE_BUILD_TYPE MATCHES Debug)
list(APPEND HIP_CXX_FLAGS -g2) list(APPEND HIP_CXX_FLAGS -g2)
...@@ -119,11 +134,11 @@ set(HIP_CLANG_FLAGS ${HIP_CXX_FLAGS}) ...@@ -119,11 +134,11 @@ set(HIP_CLANG_FLAGS ${HIP_CXX_FLAGS})
# Ask hcc to generate device code during compilation so we can use # Ask hcc to generate device code during compilation so we can use
# host linker to link. # host linker to link.
list(APPEND HIP_HCC_FLAGS -fno-gpu-rdc) list(APPEND HIP_HCC_FLAGS -fno-gpu-rdc)
list(APPEND HIP_HCC_FLAGS --amdgpu-target=gfx906) list(APPEND HIP_HCC_FLAGS --offload-arch=gfx906)
list(APPEND HIP_HCC_FLAGS --amdgpu-target=gfx926) list(APPEND HIP_HCC_FLAGS --offload-arch=gfx926) # gfx926 for DCU
list(APPEND HIP_CLANG_FLAGS -fno-gpu-rdc) list(APPEND HIP_CLANG_FLAGS -fno-gpu-rdc)
list(APPEND HIP_CLANG_FLAGS --amdgpu-target=gfx906) list(APPEND HIP_CLANG_FLAGS --offload-arch=gfx906)
list(APPEND HIP_CLANG_FLAGS --amdgpu-target=gfx926) list(APPEND HIP_CLANG_FLAGS --offload-arch=gfx926) # gfx926 for DCU
if(HIP_COMPILER STREQUAL clang) if(HIP_COMPILER STREQUAL clang)
set(hip_library_name amdhip64) set(hip_library_name amdhip64)
......
...@@ -12,11 +12,7 @@ ...@@ -12,11 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# make package for paddle fluid shared and static library # make package for paddle inference shared and static library
set(PADDLE_INSTALL_DIR
"${CMAKE_BINARY_DIR}/paddle_install_dir"
CACHE STRING "A path setting paddle shared and static libraries")
set(PADDLE_INFERENCE_INSTALL_DIR set(PADDLE_INFERENCE_INSTALL_DIR
"${CMAKE_BINARY_DIR}/paddle_inference_install_dir" "${CMAKE_BINARY_DIR}/paddle_inference_install_dir"
CACHE STRING "A path setting paddle inference shared and static libraries") CACHE STRING "A path setting paddle inference shared and static libraries")
...@@ -134,17 +130,9 @@ function(copy_part_of_thrid_party TARGET DST) ...@@ -134,17 +130,9 @@ function(copy_part_of_thrid_party TARGET DST)
add_custom_command( add_custom_command(
TARGET ${TARGET} TARGET ${TARGET}
POST_BUILD POST_BUILD
COMMAND strip -s ${dst_dir}/lib/libmkldnn.so.0 COMMAND strip -s ${dst_dir}/lib/libdnnl.so.3
COMMENT "striping libmkldnn.so.0") COMMENT "striping libdnnl.so.3")
endif() endif()
add_custom_command(
TARGET ${TARGET}
POST_BUILD
COMMAND ${CMAKE_COMMAND} -E create_symlink libmkldnn.so.0
${dst_dir}/lib/libdnnl.so.1
COMMAND ${CMAKE_COMMAND} -E create_symlink libmkldnn.so.0
${dst_dir}/lib/libdnnl.so.2
COMMENT "Make a symbol link of libmkldnn.so.0")
endif() endif()
endif() endif()
...@@ -214,7 +202,7 @@ endfunction() ...@@ -214,7 +202,7 @@ endfunction()
# inference library for only inference # inference library for only inference
set(inference_lib_deps third_party paddle_inference paddle_inference_c set(inference_lib_deps third_party paddle_inference paddle_inference_c
paddle_inference_shared paddle_inference_c_shared) paddle_inference_shared paddle_inference_c_shared)
add_custom_target(inference_lib_dist DEPENDS ${inference_lib_deps}) add_custom_target(inference_lib_dist ALL DEPENDS ${inference_lib_deps})
set(dst_dir "${PADDLE_INFERENCE_INSTALL_DIR}/third_party/threadpool") set(dst_dir "${PADDLE_INFERENCE_INSTALL_DIR}/third_party/threadpool")
copy( copy(
...@@ -273,6 +261,20 @@ else() ...@@ -273,6 +261,20 @@ else()
SRCS ${src_dir}/inference/api/paddle_*.h ${paddle_inference_lib} SRCS ${src_dir}/inference/api/paddle_*.h ${paddle_inference_lib}
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include
${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib) ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib)
if(WITH_SHARED_PHI)
set(paddle_phi_lib ${PADDLE_BINARY_DIR}/paddle/phi/libphi.*)
copy(
inference_lib_dist
SRCS ${paddle_phi_lib}
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib)
endif()
if(WITH_SHARED_IR)
set(paddle_ir_lib ${PADDLE_BINARY_DIR}/paddle/ir/libir.*)
copy(
inference_lib_dist
SRCS ${paddle_ir_lib}
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/lib)
endif()
endif() endif()
copy( copy(
...@@ -289,50 +291,56 @@ include_directories(${CMAKE_BINARY_DIR}/../paddle/fluid/framework/io) ...@@ -289,50 +291,56 @@ include_directories(${CMAKE_BINARY_DIR}/../paddle/fluid/framework/io)
copy( copy(
inference_lib_dist inference_lib_dist
SRCS ${PADDLE_SOURCE_DIR}/paddle/phi/api/ext/*.h SRCS ${PADDLE_SOURCE_DIR}/paddle/phi/api/ext/*.h
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/api/ext/) DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/phi/api/ext/)
copy( copy(
inference_lib_dist inference_lib_dist
SRCS ${PADDLE_SOURCE_DIR}/paddle/phi/api/include/*.h SRCS ${PADDLE_SOURCE_DIR}/paddle/phi/api/include/*.h
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/api/include/ DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/phi/api/include/)
)
copy( copy(
inference_lib_dist inference_lib_dist
SRCS ${PADDLE_SOURCE_DIR}/paddle/phi/api/all.h SRCS ${PADDLE_SOURCE_DIR}/paddle/phi/api/all.h
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/api/) DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/phi/api/)
copy( copy(
inference_lib_dist inference_lib_dist
SRCS ${PADDLE_SOURCE_DIR}/paddle/phi/common/*.h SRCS ${PADDLE_SOURCE_DIR}/paddle/phi/common/*.h
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/common/) DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/phi/common/)
copy( copy(
inference_lib_dist inference_lib_dist
SRCS ${PADDLE_SOURCE_DIR}/paddle/phi/core/macros.h SRCS ${PADDLE_SOURCE_DIR}/paddle/phi/core/macros.h
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/core/) DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/phi/core/)
copy( copy(
inference_lib_dist inference_lib_dist
SRCS ${PADDLE_SOURCE_DIR}/paddle/phi/core/visit_type.h SRCS ${PADDLE_SOURCE_DIR}/paddle/phi/core/visit_type.h
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/core/) DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/phi/core/)
copy(
inference_lib_dist
SRCS ${PADDLE_SOURCE_DIR}/paddle/phi/core/hostdevice.h
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/phi/core/)
copy(
inference_lib_dist
SRCS ${PADDLE_SOURCE_DIR}/paddle/fluid/platform/init_phi.h
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/phi/)
copy( copy(
inference_lib_dist inference_lib_dist
SRCS ${PADDLE_SOURCE_DIR}/paddle/utils/any.h SRCS ${PADDLE_SOURCE_DIR}/paddle/utils/any.h
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/utils/) DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/utils/)
copy( copy(
inference_lib_dist inference_lib_dist
SRCS ${PADDLE_SOURCE_DIR}/paddle/utils/optional.h SRCS ${PADDLE_SOURCE_DIR}/paddle/utils/optional.h
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/utils/) DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/utils/)
copy( copy(
inference_lib_dist inference_lib_dist
SRCS ${PADDLE_SOURCE_DIR}/paddle/utils/none.h SRCS ${PADDLE_SOURCE_DIR}/paddle/utils/none.h
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/utils/) DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/utils/)
copy( copy(
inference_lib_dist inference_lib_dist
SRCS ${PADDLE_SOURCE_DIR}/paddle/utils/flat_hash_map.h SRCS ${PADDLE_SOURCE_DIR}/paddle/utils/flat_hash_map.h
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/utils/) DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/utils/)
copy( copy(
inference_lib_dist inference_lib_dist
SRCS ${PADDLE_SOURCE_DIR}/paddle/extension.h SRCS ${PADDLE_SOURCE_DIR}/paddle/extension.h
DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/) DSTS ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/)
# the header file of phi is copied to the experimental directory,
# the include path of phi needs to be changed to adapt to inference api path # the include path of phi needs to be changed to adapt to inference api path
add_custom_command( add_custom_command(
TARGET inference_lib_dist TARGET inference_lib_dist
...@@ -375,170 +383,5 @@ if(WITH_STRIP AND NOT WIN32) ...@@ -375,170 +383,5 @@ if(WITH_STRIP AND NOT WIN32)
) )
endif() endif()
# fluid library for both train and inference
set(fluid_lib_deps inference_lib_dist)
add_custom_target(fluid_lib_dist ALL DEPENDS ${fluid_lib_deps})
set(dst_dir "${PADDLE_INSTALL_DIR}/paddle/fluid")
set(module "inference")
if(WIN32)
copy(
fluid_lib_dist
SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/api/paddle_*.h
${paddle_inference_lib}
DSTS ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module}
${dst_dir}/${module})
else()
copy(
fluid_lib_dist
SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/api/paddle_*.h
${paddle_inference_lib}
DSTS ${dst_dir}/${module} ${dst_dir}/${module} ${dst_dir}/${module})
endif()
set(module "framework")
set(framework_lib_deps framework_proto data_feed_proto trainer_desc_proto)
add_dependencies(fluid_lib_dist ${framework_lib_deps})
copy(
fluid_lib_dist
SRCS ${src_dir}/${module}/*.h
${src_dir}/${module}/details/*.h
${PADDLE_BINARY_DIR}/paddle/fluid/framework/trainer_desc.pb.h
${PADDLE_BINARY_DIR}/paddle/fluid/framework/framework.pb.h
${PADDLE_BINARY_DIR}/paddle/fluid/framework/data_feed.pb.h
${src_dir}/${module}/ir/memory_optimize_pass/*.h
${src_dir}/${module}/ir/*.h
${src_dir}/${module}/fleet/*.h
DSTS ${dst_dir}/${module}
${dst_dir}/${module}/details
${dst_dir}/${module}
${dst_dir}/${module}
${dst_dir}/${module}
${dst_dir}/${module}/ir/memory_optimize_pass
${dst_dir}/${module}/ir
${dst_dir}/${module}/fleet)
set(module "operators")
copy(
fluid_lib_dist
SRCS ${src_dir}/${module}/reader/blocking_queue.h
DSTS ${dst_dir}/${module}/reader/)
set(module "memory")
copy(
fluid_lib_dist
SRCS ${src_dir}/${module}/allocation/*.h
DSTS ${dst_dir}/${module}/allocation)
set(module "platform")
set(platform_lib_deps profiler_proto errors)
if(WITH_GPU)
set(platform_lib_deps ${platform_lib_deps} external_error_proto)
endif()
add_dependencies(fluid_lib_dist ${platform_lib_deps})
copy(
fluid_lib_dist
SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/dynload/*.h
${src_dir}/${module}/details/*.h
${PADDLE_BINARY_DIR}/paddle/fluid/platform/*.pb.h
DSTS ${dst_dir}/${module} ${dst_dir}/${module}/dynload
${dst_dir}/${module}/details ${dst_dir}/${module})
set(module "string")
copy(
fluid_lib_dist
SRCS ${PADDLE_SOURCE_DIR}/paddle/utils/${module}/*.h
${PADDLE_SOURCE_DIR}/paddle/utils/${module}/tinyformat/*.h
DSTS ${dst_dir}/${module} ${dst_dir}/${module}/tinyformat)
set(module "imperative")
copy(
fluid_lib_dist
SRCS ${src_dir}/${module}/*.h ${src_dir}/${module}/jit/*.h
DSTS ${dst_dir}/${module} ${dst_dir}/${module}/jit)
set(module "pybind")
copy(
fluid_lib_dist
SRCS ${CMAKE_CURRENT_BINARY_DIR}/paddle/fluid/${module}/pybind.h
DSTS ${dst_dir}/${module})
set(dst_dir "${PADDLE_INSTALL_DIR}/third_party/eigen3")
copy(
inference_lib_dist
SRCS ${EIGEN_INCLUDE_DIR}/Eigen/Core ${EIGEN_INCLUDE_DIR}/Eigen/src
${EIGEN_INCLUDE_DIR}/unsupported/Eigen
DSTS ${dst_dir}/Eigen ${dst_dir}/Eigen ${dst_dir}/unsupported)
set(dst_dir "${PADDLE_INSTALL_DIR}/third_party/dlpack")
copy(
inference_lib_dist
SRCS ${DLPACK_INCLUDE_DIR}/dlpack
DSTS ${dst_dir})
set(dst_dir "${PADDLE_INSTALL_DIR}/third_party/install/zlib")
copy(
inference_lib_dist
SRCS ${ZLIB_INCLUDE_DIR} ${ZLIB_LIBRARIES}
DSTS ${dst_dir} ${dst_dir}/lib)
# CMakeCache Info
copy(
fluid_lib_dist
SRCS ${PADDLE_INFERENCE_INSTALL_DIR}/third_party
${CMAKE_CURRENT_BINARY_DIR}/CMakeCache.txt
DSTS ${PADDLE_INSTALL_DIR} ${PADDLE_INSTALL_DIR})
# paddle fluid version
function(version version_file)
execute_process(
COMMAND ${GIT_EXECUTABLE} log --pretty=format:%H -1
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}
OUTPUT_VARIABLE PADDLE_GIT_COMMIT)
file(
WRITE ${version_file}
"GIT COMMIT ID: ${PADDLE_GIT_COMMIT}\n"
"WITH_MKL: ${WITH_MKL}\n"
"WITH_MKLDNN: ${WITH_MKLDNN}\n"
"WITH_GPU: ${WITH_GPU}\n"
"WITH_ROCM: ${WITH_ROCM}\n"
"WITH_ASCEND_CL: ${WITH_ASCEND_CL}\n"
"WITH_ASCEND_CXX11: ${WITH_ASCEND_CXX11}\n"
"WITH_IPU: ${WITH_IPU}\n")
if(WITH_GPU)
file(APPEND ${version_file}
"CUDA version: ${CUDA_VERSION}\n"
"CUDNN version: v${CUDNN_MAJOR_VERSION}.${CUDNN_MINOR_VERSION}\n")
endif()
if(WITH_ROCM)
file(APPEND ${version_file}
"HIP version: v${HIP_MAJOR_VERSION}.${HIP_MINOR_VERSION}\n"
"MIOpen version: v${MIOPEN_MAJOR_VERSION}.${MIOPEN_MINOR_VERSION}\n")
endif()
if(WITH_ASCEND_CL)
file(APPEND ${version_file}
"Ascend Toolkit version: ${ASCEND_TOOLKIT_VERSION}\n"
"Ascend Driver version: ${ASCEND_DRIVER_VERSION}\n")
endif()
if(WITH_IPU)
file(APPEND ${version_file} "PopART version: ${POPART_VERSION}\n")
endif()
file(APPEND ${version_file}
"CXX compiler version: ${CMAKE_CXX_COMPILER_VERSION}\n")
if(TENSORRT_FOUND)
file(
APPEND ${version_file}
"WITH_TENSORRT: ${TENSORRT_FOUND}\n"
"TensorRT version: v${TENSORRT_MAJOR_VERSION}.${TENSORRT_MINOR_VERSION}.${TENSORRT_PATCH_VERSION}.${TENSORRT_BUILD_VERSION}\n"
)
endif()
if(WITH_LITE)
file(APPEND ${version_file} "WITH_LITE: ${WITH_LITE}\n"
"LITE_GIT_TAG: ${LITE_GIT_TAG}\n")
endif()
endfunction()
version(${PADDLE_INSTALL_DIR}/version.txt)
version(${PADDLE_INFERENCE_INSTALL_DIR}/version.txt) version(${PADDLE_INFERENCE_INSTALL_DIR}/version.txt)
version(${PADDLE_INFERENCE_C_INSTALL_DIR}/version.txt) version(${PADDLE_INFERENCE_C_INSTALL_DIR}/version.txt)
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
set(INFRT_INSTALL_DIR
"${CMAKE_BINARY_DIR}/paddle_infrt_install_dir"
CACHE STRING "A path setting paddle infrt shared and static libraries")
function(copy TARGET)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DSTS)
cmake_parse_arguments(copy_lib "${options}" "${oneValueArgs}"
"${multiValueArgs}" ${ARGN})
list(LENGTH copy_lib_SRCS copy_lib_SRCS_len)
list(LENGTH copy_lib_DSTS copy_lib_DSTS_len)
if(NOT ${copy_lib_SRCS_len} EQUAL ${copy_lib_DSTS_len})
message(
FATAL_ERROR
"${TARGET} source numbers are not equal to destination numbers")
endif()
math(EXPR len "${copy_lib_SRCS_len} - 1")
foreach(index RANGE ${len})
list(GET copy_lib_SRCS ${index} src)
list(GET copy_lib_DSTS ${index} dst)
add_custom_command(
TARGET ${TARGET}
POST_BUILD
COMMAND mkdir -p "${dst}"
COMMAND cp -r "${src}" "${dst}"
COMMENT "copying ${src} -> ${dst}")
endforeach()
endfunction()
function(copy_part_of_thrid_party TARGET DST)
set(dst_dir "${DST}/third_party/install/glog")
copy(
${TARGET}
SRCS ${GLOG_INCLUDE_DIR} ${GLOG_LIBRARIES}
DSTS ${dst_dir} ${dst_dir}/lib)
endfunction()
# inference library for only inference
set(infrt_lib_deps third_party infrt infrt_static)
add_custom_target(infrt_lib_dist DEPENDS ${infrt_lib_deps})
# CMakeCache Info
copy(
infrt_lib_dist
SRCS ${CMAKE_BINARY_DIR}/CMakeCache.txt
DSTS ${INFRT_INSTALL_DIR})
set(infrt_lib ${INFRT_BINARY_DIR}/libinfrt.*)
copy(
infrt_lib_dist
SRCS ${INFRT_SOURCE_DIR}/api/infrt_api.h ${infrt_lib}
DSTS ${INFRT_INSTALL_DIR}/infrt/include ${INFRT_INSTALL_DIR}/infrt/lib)
copy(
infrt_lib_dist
SRCS ${INFRT_BINARY_DIR}/paddle/framework.pb.h
DSTS ${INFRT_INSTALL_DIR}/infrt/include/internal)
# paddle fluid version
function(version version_file)
execute_process(
COMMAND ${GIT_EXECUTABLE} log --pretty=format:%H -1
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}
OUTPUT_VARIABLE PADDLE_GIT_COMMIT)
file(WRITE ${version_file} "GIT COMMIT ID: ${PADDLE_GIT_COMMIT}\n")
file(APPEND ${version_file}
"CXX compiler version: ${CMAKE_CXX_COMPILER_VERSION}\n")
endfunction()
version(${INFRT_INSTALL_DIR}/version.txt)
...@@ -20,6 +20,15 @@ res = sys.argv[1] ...@@ -20,6 +20,15 @@ res = sys.argv[1]
out = sys.argv[2] out = sys.argv[2]
var = re.sub(r'[ .-]', '_', os.path.basename(res)) var = re.sub(r'[ .-]', '_', os.path.basename(res))
open(out, "w").write("const unsigned char " + var + "[] = {" + ",".join([ open(out, "w").write(
"0x%02x" % ord(c) for c in open(res).read() "const unsigned char "
]) + ",0};\n" + "const unsigned " + var + "_size = sizeof(" + var + ");\n") + var
+ "[] = {"
+ ",".join(["0x%02x" % ord(c) for c in open(res).read()])
+ ",0};\n"
+ "const unsigned "
+ var
+ "_size = sizeof("
+ var
+ ");\n"
)
if(NOT WITH_DISTRIBUTE OR NOT WITH_MPI)
return()
endif()
find_package(MPI)
if(NOT MPI_CXX_FOUND)
set(WITH_MPI
OFF
CACHE STRING "Disable MPI" FORCE)
message(WARNING "Not found MPI support in current system")
return()
endif()
message(STATUS "MPI compile flags: " ${MPI_CXX_COMPILE_FLAGS})
message(STATUS "MPI include path: " ${MPI_CXX_INCLUDE_PATH})
message(STATUS "MPI LINK flags path: " ${MPI_CXX_LINK_FLAGS})
message(STATUS "MPI libraries: " ${MPI_CXX_LIBRARIES})
include_directories(SYSTEM ${MPI_CXX_INCLUDE_PATH})
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${MPI_CXX_LINK_FLAGS}")
add_definitions("-DPADDLE_WITH_MPI")
find_program(
OMPI_INFO
NAMES ompi_info
HINTS ${MPI_CXX_LIBRARIES}/../bin)
if(OMPI_INFO)
execute_process(COMMAND ${OMPI_INFO} OUTPUT_VARIABLE output_)
if(output_ MATCHES "smcuda")
#NOTE some mpi lib support mpi cuda aware.
add_definitions("-DPADDLE_WITH_MPI_AWARE")
endif()
endif()
if(NOT WITH_MLU)
return()
endif()
if(NOT ENV{NEUWARE_HOME})
set(NEUWARE_HOME "/usr/local/neuware")
else()
set(NEUWARE_HOME $ENV{NEUWARE_HOME})
endif()
message(STATUS "NEUWARE_HOME: " ${NEUWARE_HOME})
set(NEUWARE_INCLUDE_DIR ${NEUWARE_HOME}/include)
set(NEUWARE_LIB_DIR ${NEUWARE_HOME}/lib64)
include_directories(${NEUWARE_INCLUDE_DIR})
set(CNNL_LIB ${NEUWARE_LIB_DIR}/libcnnl.so)
set(MLUOP_LIB ${NEUWARE_LIB_DIR}/libmluops.so)
set(CNRT_LIB ${NEUWARE_LIB_DIR}/libcnrt.so)
set(CNDRV_LIB ${NEUWARE_LIB_DIR}/libcndrv.so)
set(CNPAPI_LIB ${NEUWARE_LIB_DIR}/libcnpapi.so)
generate_dummy_static_lib(LIB_NAME "neuware_lib" GENERATOR "neuware.cmake")
set(NEUWARE_LIB_DEPS ${CNNL_LIB} ${MLUOP_LIB} ${CNRT_LIB} ${CNDRV_LIB}
${CNPAPI_LIB})
if(WITH_CNCL)
message(STATUS "Compile with CNCL!")
add_definitions(-DPADDLE_WITH_CNCL)
set(CNCL_LIB ${NEUWARE_LIB_DIR}/libcncl.so)
list(APPEND NEUWARE_LIB_DEPS ${CNCL_LIB})
endif()
target_link_libraries(neuware_lib ${NEUWARE_LIB_DEPS})
...@@ -26,6 +26,124 @@ function(find_register FILENAME PATTERN OUTPUT) ...@@ -26,6 +26,124 @@ function(find_register FILENAME PATTERN OUTPUT)
PARENT_SCOPE) PARENT_SCOPE)
endfunction() endfunction()
function(find_phi_register FILENAME ADD_PATH PATTERN)
# set op_name to OUTPUT
file(READ ${FILENAME} CONTENT)
string(
REGEX
MATCH
"${PATTERN}\\([ \t\r\n]*[a-z0-9_]*,[[ \\\t\r\n\/]*[a-z0-9_]*]?[ \\\t\r\n]*[a-zA-Z]*,[ \\\t\r\n]*[A-Z_]*"
register
"${CONTENT}")
if(NOT register STREQUAL "")
string(REPLACE "${PATTERN}(" "" register "${register}")
string(REPLACE "," ";" register "${register}")
string(REGEX REPLACE "[ \\\t\r\n]+" "" register "${register}")
string(REGEX REPLACE "//cuda_only" "" register "${register}")
list(GET register 0 kernel_name)
list(GET register 1 kernel_backend)
list(GET register 2 kernel_layout)
file(
APPEND ${ADD_PATH}
"PD_DECLARE_KERNEL(${kernel_name}, ${kernel_backend}, ${kernel_layout});\n"
)
endif()
endfunction()
# Just for those gpu kernels locating at "fluid/operators/", such as 'class_center_sample_op.cu'.
# Add other file modes if need in the future.
function(register_cu_kernel TARGET)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(register_cu_kernel "${options}" "${oneValueArgs}"
"${multiValueArgs}" ${ARGN})
set(cu_srcs)
set(op_common_deps operator op_registry layer common_infer_shape_functions)
foreach(cu_src ${register_cu_kernel_SRCS})
if(${cu_src} MATCHES ".*\\.cu$")
list(APPEND cu_srcs ${cu_src})
endif()
endforeach()
list(LENGTH cu_srcs cu_srcs_len)
if(${cu_srcs_len} EQUAL 0)
message(
FATAL_ERROR
"The GPU kernel file of ${TARGET} should contains at least one .cu file"
)
endif()
if(WITH_GPU)
nv_library(
${TARGET}
SRCS ${cu_srcs}
DEPS ${op_library_DEPS} ${op_common_deps})
elseif(WITH_ROCM)
hip_library(
${TARGET}
SRCS ${cu_srcs}
DEPS ${op_library_DEPS} ${op_common_deps})
endif()
set(OP_LIBRARY
${TARGET} ${OP_LIBRARY}
CACHE INTERNAL "op libs")
foreach(cu_src ${cu_srcs})
set(op_name "")
# Add PHI Kernel Registry Message
find_phi_register(${cu_src} ${pybind_file} "PD_REGISTER_KERNEL")
find_phi_register(${cu_src} ${pybind_file} "PD_REGISTER_STRUCT_KERNEL")
find_phi_register(${cu_src} ${pybind_file}
"PD_REGISTER_KERNEL_FOR_ALL_DTYPE")
find_register(${cu_src} "REGISTER_OP_CUDA_KERNEL" op_name)
if(NOT ${op_name} EQUAL "")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, CUDA);\n")
endif()
endforeach()
endfunction()
# Just for those mkldnn kernels locating at "fluid/operators/mkldnn/", such as 'layer_norm_mkldnn_op.cc'.
# Add other file modes if need in the future.
function(register_mkldnn_kernel TARGET)
set(options "")
set(oneValueArgs "")
set(multiValueArgs SRCS DEPS)
cmake_parse_arguments(register_mkldnn_kernel "${options}" "${oneValueArgs}"
"${multiValueArgs}" ${ARGN})
set(mkldnn_cc_srcs)
set(op_common_deps operator op_registry phi layer
common_infer_shape_functions)
foreach(mkldnn_src ${register_mkldnn_kernel_SRCS})
if(${mkldnn_src} MATCHES ".*_mkldnn_op.cc$")
list(APPEND mkldnn_cc_srcs mkldnn/${mkldnn_src})
endif()
endforeach()
list(LENGTH mkldnn_cc_srcs mkldnn_cc_srcs_len)
if(${mkldnn_cc_srcs_len} EQUAL 0)
message(
FATAL_ERROR
"The MKLDNN kernel file of ${TARGET} should contains at least one *.*_mkldnn_op.cc file"
)
endif()
if(WITH_MKLDNN)
cc_library(
${TARGET}
SRCS ${mkldnn_cc_srcs}
DEPS ${op_library_DEPS} ${op_common_deps})
endif()
set(OP_LIBRARY
${TARGET} ${OP_LIBRARY}
CACHE INTERNAL "op libs")
foreach(mkldnn_src ${mkldnn_cc_srcs})
set(op_name "")
find_register(${mkldnn_src} "REGISTER_OP_KERNEL" op_name)
if(NOT ${op_name} EQUAL "")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, MKLDNN);\n")
endif()
endforeach()
endfunction()
function(op_library TARGET) function(op_library TARGET)
# op_library is a function to create op library. The interface is same as # op_library is a function to create op library. The interface is same as
# cc_library. But it handle split GPU/CPU code and link some common library # cc_library. But it handle split GPU/CPU code and link some common library
...@@ -37,8 +155,6 @@ function(op_library TARGET) ...@@ -37,8 +155,6 @@ function(op_library TARGET)
set(hip_cc_srcs) set(hip_cc_srcs)
set(xpu_cc_srcs) set(xpu_cc_srcs)
set(xpu_kp_cc_srcs) set(xpu_kp_cc_srcs)
set(npu_cc_srcs)
set(mlu_cc_srcs)
set(cudnn_cu_cc_srcs) set(cudnn_cu_cc_srcs)
set(miopen_cu_cc_srcs) set(miopen_cu_cc_srcs)
set(cudnn_cu_srcs) set(cudnn_cu_srcs)
...@@ -47,14 +163,8 @@ function(op_library TARGET) ...@@ -47,14 +163,8 @@ function(op_library TARGET)
set(MIOPEN_FILE) set(MIOPEN_FILE)
set(mkldnn_cc_srcs) set(mkldnn_cc_srcs)
set(MKLDNN_FILE) set(MKLDNN_FILE)
set(op_common_deps operator op_registry math_function layer set(op_common_deps operator op_registry phi layer
common_infer_shape_functions) common_infer_shape_functions)
if(WITH_ASCEND_CL)
set(op_common_deps ${op_common_deps} npu_op_runner)
endif()
if(WITH_MLU)
set(op_common_deps ${op_common_deps} mlu_baseop)
endif()
# Option `UNITY` is used to specify that operator `TARGET` will compiles with Unity Build. # Option `UNITY` is used to specify that operator `TARGET` will compiles with Unity Build.
set(options UNITY) set(options UNITY)
...@@ -83,9 +193,6 @@ function(op_library TARGET) ...@@ -83,9 +193,6 @@ function(op_library TARGET)
${CMAKE_CURRENT_BINARY_DIR}/${TARGET}.cu) ${CMAKE_CURRENT_BINARY_DIR}/${TARGET}.cu)
list(APPEND cu_srcs ${CMAKE_CURRENT_BINARY_DIR}/${TARGET}.cu) list(APPEND cu_srcs ${CMAKE_CURRENT_BINARY_DIR}/${TARGET}.cu)
endif() endif()
if(WITH_NV_JETSON)
list(REMOVE_ITEM cu_srcs "decode_jpeg_op.cu")
endif()
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu) if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu)
set(PART_CUDA_KERNEL_FILES set(PART_CUDA_KERNEL_FILES
${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET}.part.cu
...@@ -150,18 +257,6 @@ function(op_library TARGET) ...@@ -150,18 +257,6 @@ function(op_library TARGET)
list(APPEND xpu_kp_cc_srcs ${TARGET}.kps) list(APPEND xpu_kp_cc_srcs ${TARGET}.kps)
endif() endif()
endif() endif()
if(WITH_ASCEND_CL)
string(REPLACE "_op" "_op_npu" NPU_FILE "${TARGET}")
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${NPU_FILE}.cc)
list(APPEND npu_cc_srcs ${NPU_FILE}.cc)
endif()
endif()
if(WITH_MLU)
string(REPLACE "_op" "_op_mlu" MLU_FILE "${TARGET}")
if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${MLU_FILE}.cc)
list(APPEND mlu_cc_srcs ${MLU_FILE}.cc)
endif()
endif()
else() else()
foreach(src ${op_library_SRCS}) foreach(src ${op_library_SRCS})
if(WITH_ROCM AND ${src} MATCHES ".*_cudnn_op.cu$") if(WITH_ROCM AND ${src} MATCHES ".*_cudnn_op.cu$")
...@@ -188,12 +283,18 @@ function(op_library TARGET) ...@@ -188,12 +283,18 @@ function(op_library TARGET)
list(APPEND xpu_kp_cc_srcs ${src}) list(APPEND xpu_kp_cc_srcs ${src})
elseif(WITH_XPU_KP AND ${src} MATCHES ".*\\.kps$") elseif(WITH_XPU_KP AND ${src} MATCHES ".*\\.kps$")
list(APPEND xpu_kp_cc_srcs ${src}) list(APPEND xpu_kp_cc_srcs ${src})
elseif(WITH_ASCEND_CL AND ${src} MATCHES ".*_op_npu.cc$")
list(APPEND npu_cc_srcs ${src})
elseif(WITH_MLU AND ${src} MATCHES ".*_op_mlu.cc$")
list(APPEND mlu_cc_srcs ${src})
elseif(${src} MATCHES ".*\\.cc$") elseif(${src} MATCHES ".*\\.cc$")
list(APPEND cc_srcs ${src}) list(APPEND cc_srcs ${src})
elseif((WITH_ROCM OR WITH_GPU) AND ${src} MATCHES ".*\\.kps$")
string(REPLACE ".kps" ".cu" src_cu ${src})
file(COPY ${src} DESTINATION ${CMAKE_CURRENT_BINARY_DIR})
file(RENAME ${CMAKE_CURRENT_BINARY_DIR}/${src}
${CMAKE_CURRENT_BINARY_DIR}/${src_cu})
if(WITH_ROCM)
list(APPEND hip_srcs ${CMAKE_CURRENT_BINARY_DIR}/${src_cu})
else()
list(APPEND cu_srcs ${CMAKE_CURRENT_BINARY_DIR}/${src_cu})
endif()
else() else()
message( message(
FATAL_ERROR FATAL_ERROR
...@@ -284,7 +385,7 @@ function(op_library TARGET) ...@@ -284,7 +385,7 @@ function(op_library TARGET)
list(REMOVE_ITEM hip_srcs "eigh_op.cu") list(REMOVE_ITEM hip_srcs "eigh_op.cu")
list(REMOVE_ITEM hip_srcs "lstsq_op.cu") list(REMOVE_ITEM hip_srcs "lstsq_op.cu")
list(REMOVE_ITEM hip_srcs "multinomial_op.cu") list(REMOVE_ITEM hip_srcs "multinomial_op.cu")
list(REMOVE_ITEM hip_srcs "decode_jpeg_op.cu") list(REMOVE_ITEM hip_srcs "multiclass_nms3_op.cu")
hip_library( hip_library(
${TARGET} ${TARGET}
SRCS ${cc_srcs} ${hip_cc_srcs} ${miopen_cu_cc_srcs} ${miopen_cu_srcs} SRCS ${cc_srcs} ${hip_cc_srcs} ${miopen_cu_cc_srcs} ${miopen_cu_srcs}
...@@ -296,24 +397,11 @@ function(op_library TARGET) ...@@ -296,24 +397,11 @@ function(op_library TARGET)
SRCS ${cc_srcs} ${mkldnn_cc_srcs} ${xpu_cc_srcs} ${xpu_kp_cc_srcs} SRCS ${cc_srcs} ${mkldnn_cc_srcs} ${xpu_cc_srcs} ${xpu_kp_cc_srcs}
DEPS ${op_library_DEPS} ${op_common_deps}) DEPS ${op_library_DEPS} ${op_common_deps})
else() else()
# deal with CANN version control while registering NPU operators before build
if(WITH_ASCEND_CL)
if(CANN_VERSION LESS 504000)
list(REMOVE_ITEM npu_cc_srcs "multinomial_op_npu.cc")
list(REMOVE_ITEM npu_cc_srcs "take_along_axis_op_npu.cc")
endif()
endif()
# Unity Build relies on global option `WITH_UNITY_BUILD` and local option `UNITY`. # Unity Build relies on global option `WITH_UNITY_BUILD` and local option `UNITY`.
if(WITH_UNITY_BUILD AND op_library_UNITY) if(WITH_UNITY_BUILD AND op_library_UNITY)
# Combine the cc source files. # Combine the cc source files.
compose_unity_target_sources( compose_unity_target_sources(${UNITY_TARGET} cc ${cc_srcs}
${UNITY_TARGET} ${mkldnn_cc_srcs} ${xpu_cc_srcs})
cc
${cc_srcs}
${mkldnn_cc_srcs}
${xpu_cc_srcs}
${npu_cc_srcs}
${mlu_cc_srcs})
if(TARGET ${UNITY_TARGET}) if(TARGET ${UNITY_TARGET})
# If `UNITY_TARGET` exists, add source files to `UNITY_TARGET`. # If `UNITY_TARGET` exists, add source files to `UNITY_TARGET`.
target_sources(${UNITY_TARGET} PRIVATE ${unity_target_cc_sources}) target_sources(${UNITY_TARGET} PRIVATE ${unity_target_cc_sources})
...@@ -329,8 +417,7 @@ function(op_library TARGET) ...@@ -329,8 +417,7 @@ function(op_library TARGET)
else() else()
cc_library( cc_library(
${TARGET} ${TARGET}
SRCS ${cc_srcs} ${mkldnn_cc_srcs} ${xpu_cc_srcs} ${npu_cc_srcs} SRCS ${cc_srcs} ${mkldnn_cc_srcs} ${xpu_cc_srcs}
${mlu_cc_srcs}
DEPS ${op_library_DEPS} ${op_common_deps}) DEPS ${op_library_DEPS} ${op_common_deps})
endif() endif()
endif() endif()
...@@ -342,8 +429,6 @@ function(op_library TARGET) ...@@ -342,8 +429,6 @@ function(op_library TARGET)
list(LENGTH mkldnn_cc_srcs mkldnn_cc_srcs_len) list(LENGTH mkldnn_cc_srcs mkldnn_cc_srcs_len)
list(LENGTH xpu_cc_srcs xpu_cc_srcs_len) list(LENGTH xpu_cc_srcs xpu_cc_srcs_len)
list(LENGTH miopen_cu_cc_srcs miopen_cu_cc_srcs_len) list(LENGTH miopen_cu_cc_srcs miopen_cu_cc_srcs_len)
list(LENGTH npu_cc_srcs npu_cc_srcs_len)
list(LENGTH mlu_cc_srcs mlu_cc_srcs_len)
# Define operators that don't need pybind here. # Define operators that don't need pybind here.
foreach( foreach(
...@@ -371,6 +456,11 @@ function(op_library TARGET) ...@@ -371,6 +456,11 @@ function(op_library TARGET)
foreach(cc_src ${cc_srcs}) foreach(cc_src ${cc_srcs})
# pybind USE_OP_ITSELF # pybind USE_OP_ITSELF
set(op_name "") set(op_name "")
# Add PHI Kernel Registry Message
find_phi_register(${cc_src} ${pybind_file} "PD_REGISTER_KERNEL")
find_phi_register(${cc_src} ${pybind_file} "PD_REGISTER_STRUCT_KERNEL")
find_phi_register(${cc_src} ${pybind_file}
"PD_REGISTER_KERNEL_FOR_ALL_DTYPE")
find_register(${cc_src} "REGISTER_OPERATOR" op_name) find_register(${cc_src} "REGISTER_OPERATOR" op_name)
if(NOT ${op_name} EQUAL "") if(NOT ${op_name} EQUAL "")
file(APPEND ${pybind_file} "USE_OP_ITSELF(${op_name});\n") file(APPEND ${pybind_file} "USE_OP_ITSELF(${op_name});\n")
...@@ -379,6 +469,17 @@ function(op_library TARGET) ...@@ -379,6 +469,17 @@ function(op_library TARGET)
set(pybind_flag 1) set(pybind_flag 1)
endif() endif()
# pybind USE_OP_ITSELF
set(op_name "")
# Add PHI Kernel Registry Message
find_register(${cc_src} "REGISTER_ACTIVATION_OP" op_name)
if(NOT ${op_name} EQUAL "")
file(APPEND ${pybind_file} "USE_OP_ITSELF(${op_name});\n")
# hack: for example, the target in conv_transpose_op.cc is conv2d_transpose, used in mkldnn
set(TARGET ${op_name})
set(pybind_flag 1)
endif()
set(op_name "") set(op_name "")
find_register(${cc_src} "REGISTER_OP_WITHOUT_GRADIENT" op_name) find_register(${cc_src} "REGISTER_OP_WITHOUT_GRADIENT" op_name)
if(NOT ${op_name} EQUAL "") if(NOT ${op_name} EQUAL "")
...@@ -408,6 +509,11 @@ function(op_library TARGET) ...@@ -408,6 +509,11 @@ function(op_library TARGET)
# message("cu_srcs ${cu_srcs}") # message("cu_srcs ${cu_srcs}")
foreach(cu_src ${cu_srcs}) foreach(cu_src ${cu_srcs})
set(op_name "") set(op_name "")
# Add PHI Kernel Registry Message
find_phi_register(${cu_src} ${pybind_file} "PD_REGISTER_KERNEL")
find_phi_register(${cu_src} ${pybind_file} "PD_REGISTER_STRUCT_KERNEL")
find_phi_register(${cu_src} ${pybind_file}
"PD_REGISTER_KERNEL_FOR_ALL_DTYPE")
find_register(${cu_src} "REGISTER_OP_CUDA_KERNEL" op_name) find_register(${cu_src} "REGISTER_OP_CUDA_KERNEL" op_name)
if(NOT ${op_name} EQUAL "") if(NOT ${op_name} EQUAL "")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, CUDA);\n") file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, CUDA);\n")
...@@ -421,6 +527,10 @@ function(op_library TARGET) ...@@ -421,6 +527,10 @@ function(op_library TARGET)
foreach(hip_src ${hip_srcs}) foreach(hip_src ${hip_srcs})
set(op_name "") set(op_name "")
find_register(${hip_src} "REGISTER_OP_CUDA_KERNEL" op_name) find_register(${hip_src} "REGISTER_OP_CUDA_KERNEL" op_name)
find_phi_register(${hip_src} ${pybind_file} "PD_REGISTER_KERNEL")
find_phi_register(${hip_src} ${pybind_file} "PD_REGISTER_STRUCT_KERNEL")
find_phi_register(${hip_src} ${pybind_file}
"PD_REGISTER_KERNEL_FOR_ALL_DTYPE")
if(NOT ${op_name} EQUAL "") if(NOT ${op_name} EQUAL "")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, CUDA);\n") file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, CUDA);\n")
set(pybind_flag 1) set(pybind_flag 1)
...@@ -454,6 +564,7 @@ function(op_library TARGET) ...@@ -454,6 +564,7 @@ function(op_library TARGET)
foreach(xpu_src ${xpu_cc_srcs}) foreach(xpu_src ${xpu_cc_srcs})
set(op_name "") set(op_name "")
find_register(${xpu_src} "REGISTER_OP_XPU_KERNEL" op_name) find_register(${xpu_src} "REGISTER_OP_XPU_KERNEL" op_name)
find_phi_register(${xpu_src} ${pybind_file} "PD_REGISTER_STRUCT_KERNEL")
if(NOT ${op_name} EQUAL "") if(NOT ${op_name} EQUAL "")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, XPU);\n") file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, XPU);\n")
set(pybind_flag 1) set(pybind_flag 1)
...@@ -474,6 +585,8 @@ function(op_library TARGET) ...@@ -474,6 +585,8 @@ function(op_library TARGET)
foreach(xpu_kp_src ${xpu_kp_cc_srcs}) foreach(xpu_kp_src ${xpu_kp_cc_srcs})
set(op_name "") set(op_name "")
find_register(${xpu_kp_src} "REGISTER_OP_KERNEL" op_name) find_register(${xpu_kp_src} "REGISTER_OP_KERNEL" op_name)
find_phi_register(${xpu_kp_src} ${pybind_file}
"PD_REGISTER_STRUCT_KERNEL")
if(NOT ${op_name} EQUAL "") if(NOT ${op_name} EQUAL "")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, KP);\n") file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, KP);\n")
message(STATUS "Building KP Target: ${op_name}") message(STATUS "Building KP Target: ${op_name}")
...@@ -482,56 +595,11 @@ function(op_library TARGET) ...@@ -482,56 +595,11 @@ function(op_library TARGET)
endforeach() endforeach()
endif() endif()
# pybind USE_OP_DEVICE_KERNEL for NPU
if(WITH_ASCEND_CL AND ${npu_cc_srcs_len} GREATER 0)
foreach(npu_src ${npu_cc_srcs})
set(op_name "")
find_register(${npu_src} "REGISTER_OP_NPU_KERNEL" op_name)
if(NOT ${op_name} EQUAL "")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, NPU);\n")
set(pybind_flag 1)
endif()
endforeach()
endif()
# pybind USE_OP_DEVICE_KERNEL for MLU
if(WITH_MLU AND ${mlu_cc_srcs_len} GREATER 0)
foreach(mlu_src ${mlu_cc_srcs})
set(op_name "")
find_register(${mlu_src} "REGISTER_OP_MLU_KERNEL" op_name)
if(NOT ${op_name} EQUAL "")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, MLU);\n")
set(pybind_flag 1)
endif()
endforeach()
endif()
# pybind USE_OP_DEVICE_KERNEL for MKLDNN # pybind USE_OP_DEVICE_KERNEL for MKLDNN
if(WITH_MKLDNN AND ${mkldnn_cc_srcs_len} GREATER 0) if(WITH_MKLDNN AND ${mkldnn_cc_srcs_len} GREATER 0)
# Append first implemented MKLDNN activation operator # Append first implemented MKLDNN activation operator
if(${MKLDNN_FILE} STREQUAL "activation_mkldnn_op") if(${MKLDNN_FILE} STREQUAL "activation_mkldnn_op")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(softplus, MKLDNN);\n") file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(softplus, MKLDNN);\n")
elseif(${MKLDNN_FILE} STREQUAL "conv_mkldnn_op")
file(APPEND ${pybind_file}
"USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN, FP32);\n")
file(APPEND ${pybind_file}
"USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN, S8);\n")
file(APPEND ${pybind_file}
"USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(conv2d, MKLDNN, U8);\n")
elseif(${MKLDNN_FILE} STREQUAL "transpose_mkldnn_op")
file(APPEND ${pybind_file}
"USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(transpose2, MKLDNN, FP32);\n")
file(APPEND ${pybind_file}
"USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(transpose2, MKLDNN, S8);\n")
file(APPEND ${pybind_file}
"USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(transpose2, MKLDNN, U8);\n")
elseif(${MKLDNN_FILE} STREQUAL "fc_mkldnn_op")
file(APPEND ${pybind_file}
"USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(fc, MKLDNN, FP32);\n")
file(APPEND ${pybind_file}
"USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(fc, MKLDNN, S8);\n")
file(APPEND ${pybind_file}
"USE_OP_DEVICE_KERNEL_WITH_CUSTOM_TYPE(fc, MKLDNN, U8);\n")
else() else()
foreach(mkldnn_src ${mkldnn_cc_srcs}) foreach(mkldnn_src ${mkldnn_cc_srcs})
set(op_name "") set(op_name "")
...@@ -586,8 +654,6 @@ function(register_operators) ...@@ -586,8 +654,6 @@ function(register_operators)
"*_op.cc") "*_op.cc")
string(REPLACE "_mkldnn" "" OPS "${OPS}") string(REPLACE "_mkldnn" "" OPS "${OPS}")
string(REPLACE "_xpu" "" OPS "${OPS}") string(REPLACE "_xpu" "" OPS "${OPS}")
string(REPLACE "_npu" "" OPS "${OPS}")
string(REPLACE "_mlu" "" OPS "${OPS}")
string(REPLACE ".cc" "" OPS "${OPS}") string(REPLACE ".cc" "" OPS "${OPS}")
list(REMOVE_DUPLICATES OPS) list(REMOVE_DUPLICATES OPS)
list(LENGTH register_operators_DEPS register_operators_DEPS_len) list(LENGTH register_operators_DEPS register_operators_DEPS_len)
......
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# make package for paddle shared library
set(PADDLE_INSTALL_DIR ${PADDLE_BINARY_DIR}/paddle_install_dir)
set(PADDLE_LIB_TEST_DIR ${PADDLE_BINARY_DIR}/test/paddle_lib)
configure_file(${PADDLE_SOURCE_DIR}/cmake/PaddleConfig.cmake.in
${PADDLE_INSTALL_DIR}/cmake/PaddleConfig.cmake @ONLY)
configure_file(${PADDLE_SOURCE_DIR}/test/paddle_lib/CMakeLists.txt.in
${PADDLE_BINARY_DIR}/test/paddle_lib/CMakeLists.txt @ONLY)
version(${PADDLE_INSTALL_DIR}/version.txt)
...@@ -64,6 +64,11 @@ function(generate_unify_header DIR_NAME) ...@@ -64,6 +64,11 @@ function(generate_unify_header DIR_NAME)
endif() endif()
endif() endif()
endforeach() endforeach()
if(DEFINED REDUCE_INFERENCE_LIB_SIZE)
if(${kernel_name} MATCHES ".*_grad")
continue()
endif()
endif()
# append header into extension.h # append header into extension.h
string(REPLACE "${PADDLE_SOURCE_DIR}\/" "" header_file "${header_file}") string(REPLACE "${PADDLE_SOURCE_DIR}\/" "" header_file "${header_file}")
file(APPEND ${phi_extension_header_file} "#include \"${header_file}\"\n") file(APPEND ${phi_extension_header_file} "#include \"${header_file}\"\n")
...@@ -78,52 +83,121 @@ function(kernel_declare TARGET_LIST) ...@@ -78,52 +83,121 @@ function(kernel_declare TARGET_LIST)
string( string(
REGEX REGEX
MATCH MATCH
"(PD_REGISTER_KERNEL|PD_REGISTER_GENERAL_KERNEL)\\([ \t\r\n]*[a-z0-9_]*,[[ \\\t\r\n\/]*[a-z0-9_]*]?[ \\\t\r\n]*[a-zA-Z]*,[ \\\t\r\n]*[A-Z_]*" "(PD_REGISTER_KERNEL|PD_REGISTER_KERNEL_FOR_ALL_DTYPE|PD_REGISTER_KERNEL_FOR_ALL_BACKEND_DTYPE|PD_REGISTER_KERNEL_FOR_ALL_BACKEND_DTYPE_EXCEPT_CUSTOM)\\([ \t\r\n]*[a-z0-9_]*,[[ \\\t\r\n\/]*[a-z0-9_]*]?[ \\\t\r\n]*[a-zA-Z_]*,[ \\\t\r\n]*[A-Z_]*"
first_registry first_registry
"${kernel_impl}") "${kernel_impl}")
if(NOT first_registry STREQUAL "") set(kernel_declare_id "")
while(NOT first_registry STREQUAL "")
string(REPLACE "${first_registry}" "" kernel_impl "${kernel_impl}")
# some gpu kernel can run on cuda, but not support jetson, so we add this branch
if(WITH_NV_JETSON)
string(FIND "${first_registry}" "decode_jpeg" pos)
if(pos GREATER 1)
set(first_registry "")
endif()
endif()
# fusion group kernel is not supported in windows and mac
if(WIN32 OR APPLE)
string(FIND "${first_registry}" "fusion_group" pos)
if(pos GREATER 1)
set(first_registry "")
endif()
endif()
# some gpu kernel only can run on cuda, not support rocm, so we add this branch # some gpu kernel only can run on cuda, not support rocm, so we add this branch
if(WITH_ROCM OR WITH_NV_JETSON) if(WITH_ROCM)
string(FIND "${first_registry}" "cuda_only" pos) string(FIND "${first_registry}" "cuda_only" pos)
if(pos GREATER 1) if(pos GREATER 1)
continue() set(first_registry "")
endif()
endif()
if(NOT first_registry STREQUAL "")
string(
REGEX
MATCH
"(PD_REGISTER_KERNEL_FOR_ALL_BACKEND_DTYPE_EXCEPT_CUSTOM)\\([ \t\r\n]*[a-z0-9_]*,[[ \\\t\r\n\/]*[a-z0-9_]*]?[ \\\t\r\n]*[a-zA-Z_]*,[ \\\t\r\n]*[A-Z_]*"
is_all_backend
"${first_registry}")
if(NOT is_all_backend STREQUAL "")
# parse the registerd kernel message
string(
REPLACE "PD_REGISTER_KERNEL_FOR_ALL_BACKEND_DTYPE_EXCEPT_CUSTOM("
"" kernel_msg "${first_registry}")
else()
string(
REGEX
MATCH
"(PD_REGISTER_KERNEL_FOR_ALL_BACKEND_DTYPE)\\([ \t\r\n]*[a-z0-9_]*,[[ \\\t\r\n\/]*[a-z0-9_]*]?[ \\\t\r\n]*[a-zA-Z_]*,[ \\\t\r\n]*[A-Z_]*"
is_all_backend
"${first_registry}")
# parse the registerd kernel message
string(REPLACE "PD_REGISTER_KERNEL_FOR_ALL_BACKEND_DTYPE(" ""
kernel_msg "${first_registry}")
endif()
string(REPLACE "PD_REGISTER_KERNEL(" "" kernel_msg "${kernel_msg}")
string(REPLACE "PD_REGISTER_KERNEL_FOR_ALL_DTYPE(" "" kernel_msg
"${kernel_msg}")
string(REPLACE "," ";" kernel_msg "${kernel_msg}")
string(REGEX REPLACE "[ \\\t\r\n]+" "" kernel_msg "${kernel_msg}")
string(REGEX REPLACE "//cuda_only" "" kernel_msg "${kernel_msg}")
list(GET kernel_msg 0 kernel_name)
if(NOT is_all_backend STREQUAL "")
list(GET kernel_msg 1 kernel_layout)
set(kernel_backend "CPU")
else()
list(GET kernel_msg 1 kernel_backend)
list(GET kernel_msg 2 kernel_layout)
endif()
set(kernel_declare_id
"${kernel_declare_id}PD_DECLARE_KERNEL(${kernel_name}, ${kernel_backend}, ${kernel_layout});"
)
if("${KERNEL_LIST}" STREQUAL "")
set(first_registry "")
else()
string(
REGEX
MATCH
"(PD_REGISTER_KERNEL|PD_REGISTER_KERNEL_FOR_ALL_DTYPE|PD_REGISTER_KERNEL_FOR_ALL_BACKEND_DTYPE|PD_REGISTER_KERNEL_FOR_ALL_BACKEND_DTYPE_EXCEPT_CUSTOM)\\([ \t\r\n]*[a-z0-9_]*,[[ \\\t\r\n\/]*[a-z0-9_]*]?[ \\\t\r\n]*[a-zA-Z_]*,[ \\\t\r\n]*[A-Z_]*"
first_registry
"${kernel_impl}")
endif() endif()
endif() endif()
# parse the registerd kernel message endwhile()
string(REPLACE "PD_REGISTER_KERNEL(" "" kernel_msg "${first_registry}") # append kernel declare into declarations.h
string(REPLACE "PD_REGISTER_GENERAL_KERNEL(" "" kernel_msg if(NOT kernel_declare_id STREQUAL "")
"${kernel_msg}") file(APPEND ${kernel_declare_file} "${kernel_declare_id}\n")
string(REPLACE "," ";" kernel_msg "${kernel_msg}")
string(REGEX REPLACE "[ \\\t\r\n]+" "" kernel_msg "${kernel_msg}")
string(REGEX REPLACE "//cuda_only" "" kernel_msg "${kernel_msg}")
list(GET kernel_msg 0 kernel_name)
list(GET kernel_msg 1 kernel_backend)
list(GET kernel_msg 2 kernel_layout)
# append kernel declare into declarations.h
file(
APPEND ${kernel_declare_file}
"PD_DECLARE_KERNEL(${kernel_name}, ${kernel_backend}, ${kernel_layout});\n"
)
endif() endif()
endforeach() endforeach()
endfunction() endfunction()
function(append_op_util_declare TARGET) function(append_op_util_declare TARGET)
file(READ ${CMAKE_CURRENT_SOURCE_DIR}/${TARGET} target_content) file(READ ${TARGET} target_content)
string(REGEX MATCH "(PD_REGISTER_ARG_MAPPING_FN)\\([ \t\r\n]*[a-z0-9_]*"
util_registrar "${target_content}")
if(NOT ${util_registrar} EQUAL "")
string(REPLACE "PD_REGISTER_ARG_MAPPING_FN" "PD_DECLARE_ARG_MAPPING_FN"
util_declare "${util_registrar}")
string(APPEND util_declare ");\n")
file(APPEND ${op_utils_header} "${util_declare}")
endif()
endfunction()
function(append_op_kernel_map_declare TARGET)
file(READ ${TARGET} target_content)
string( string(
REGEX REGEX
MATCH MATCH
"(PD_REGISTER_BASE_KERNEL_NAME|PD_REGISTER_ARG_MAPPING_FN)\\([ \t\r\n]*[a-z0-9_]*" "(PD_REGISTER_BASE_KERNEL_NAME)\\([ \t\r\n]*[a-z0-9_]*,[ \\\t\r\n]*[a-z0-9_]*"
util_registrar kernel_mapping_registrar
"${target_content}") "${target_content}")
string(REPLACE "PD_REGISTER_ARG_MAPPING_FN" "PD_DECLARE_ARG_MAPPING_FN" if(NOT ${kernel_mapping_registrar} EQUAL "")
util_declare "${util_registrar}") string(REPLACE "PD_REGISTER_BASE_KERNEL_NAME" "PD_DECLARE_BASE_KERNEL_NAME"
string(REPLACE "PD_REGISTER_BASE_KERNEL_NAME" "PD_DECLARE_BASE_KERNEL_NAME" kernel_mapping_declare "${kernel_mapping_registrar}")
util_declare "${util_declare}") string(APPEND kernel_mapping_declare ");\n")
string(APPEND util_declare ");\n") file(APPEND ${op_utils_header} "${kernel_mapping_declare}")
file(APPEND ${op_utils_header} "${util_declare}") endif()
endfunction() endfunction()
function(register_op_utils TARGET_NAME) function(register_op_utils TARGET_NAME)
...@@ -134,13 +208,11 @@ function(register_op_utils TARGET_NAME) ...@@ -134,13 +208,11 @@ function(register_op_utils TARGET_NAME)
cmake_parse_arguments(register_op_utils "${options}" "${oneValueArgs}" cmake_parse_arguments(register_op_utils "${options}" "${oneValueArgs}"
"${multiValueArgs}" ${ARGN}) "${multiValueArgs}" ${ARGN})
file( file(GLOB SIGNATURES "${PADDLE_SOURCE_DIR}/paddle/phi/ops/compat/*_sig.cc")
GLOB SIGNATURES
RELATIVE "${CMAKE_CURRENT_SOURCE_DIR}"
"*_sig.cc")
foreach(target ${SIGNATURES}) foreach(target ${SIGNATURES})
append_op_util_declare(${target}) append_op_util_declare(${target})
list(APPEND utils_srcs ${CMAKE_CURRENT_SOURCE_DIR}/${target}) append_op_kernel_map_declare(${target})
list(APPEND utils_srcs ${target})
endforeach() endforeach()
cc_library( cc_library(
...@@ -160,18 +232,30 @@ function(prune_declaration_h) ...@@ -160,18 +232,30 @@ function(prune_declaration_h)
file(APPEND ${kernel_declare_file_prune} file(APPEND ${kernel_declare_file_prune}
"#include \"paddle/phi/core/kernel_registry.h\"\n") "#include \"paddle/phi/core/kernel_registry.h\"\n")
set(kernel_declare_list_prune)
foreach(kernel_registry IN LISTS kernel_registry_list) foreach(kernel_registry IN LISTS kernel_registry_list)
if(NOT ${kernel_registry} EQUAL "") if(NOT "${kernel_registry}" EQUAL "")
foreach(kernel_name IN LISTS kernel_list) foreach(kernel_name IN LISTS kernel_list)
string(FIND ${kernel_registry} "(${kernel_name})" index1) string(FIND "${kernel_registry}" "(${kernel_name})" index1)
string(FIND ${kernel_registry} "(${kernel_name}," index2) string(FIND "${kernel_registry}" "(${kernel_name}," index2)
if((NOT ${index1} EQUAL "-1") OR (NOT ${index2} EQUAL "-1")) if((NOT ${index1} EQUAL "-1") OR (NOT ${index2} EQUAL "-1"))
file(APPEND ${kernel_declare_file_prune} "${kernel_registry}\n") string(
REGEX
MATCH
"PD_DECLARE_KERNEL\\([a-z0-9_]*, [[a-z0-9_]*]?[a-zA-Z_]*, [A-Z_]*\\)"
first_registry
"${kernel_registry}")
list(APPEND kernel_declare_list_prune "${first_registry}")
endif() endif()
endforeach() endforeach()
endif() endif()
endforeach() endforeach()
list(REMOVE_DUPLICATES kernel_declare_list_prune)
foreach(kernel_declare_prune IN LISTS kernel_declare_list_prune)
file(APPEND ${kernel_declare_file_prune} "${kernel_declare_prune};\n")
endforeach()
file(WRITE ${kernel_declare_file} "") file(WRITE ${kernel_declare_file} "")
file(STRINGS ${kernel_declare_file_prune} kernel_registry_list_tmp) file(STRINGS ${kernel_declare_file_prune} kernel_registry_list_tmp)
foreach(kernel_registry IN LISTS kernel_registry_list_tmp) foreach(kernel_registry IN LISTS kernel_registry_list_tmp)
...@@ -180,3 +264,27 @@ function(prune_declaration_h) ...@@ -180,3 +264,27 @@ function(prune_declaration_h)
endif() endif()
endforeach() endforeach()
endfunction() endfunction()
function(collect_srcs SRC_GROUP)
set(options)
set(oneValueArgs)
set(multiValueArgs "SRCS")
cmake_parse_arguments(prefix "" "" "${multiValueArgs}" ${ARGN})
foreach(src ${prefix_SRCS})
set(${SRC_GROUP}
"${${SRC_GROUP}};${CMAKE_CURRENT_SOURCE_DIR}/${src}"
CACHE INTERNAL "")
endforeach()
endfunction()
function(collect_generated_srcs SRC_GROUP)
set(options)
set(oneValueArgs)
set(multiValueArgs "SRCS")
cmake_parse_arguments(prefix "" "" "${multiValueArgs}" ${ARGN})
foreach(src ${prefix_SRCS})
set(${SRC_GROUP}
"${${SRC_GROUP}};${src}"
CACHE INTERNAL "")
endforeach()
endfunction()
...@@ -17,37 +17,36 @@ set(PADDLE_INFERENCE_INSTALL_DIR ...@@ -17,37 +17,36 @@ set(PADDLE_INFERENCE_INSTALL_DIR
function(phi_header_path_compat TARGET_PATH) function(phi_header_path_compat TARGET_PATH)
message(STATUS "phi header path compat processing: ${TARGET_PATH}") message(STATUS "phi header path compat processing: ${TARGET_PATH}")
string(FIND ${TARGET_PATH} "experimental" pos) file(GLOB HEADERS "${TARGET_PATH}/*" "*.h")
if(pos GREATER 1) foreach(header ${HEADERS})
file(GLOB HEADERS "${TARGET_PATH}/*" "*.h") if(${header} MATCHES ".*.h$")
foreach(header ${HEADERS}) file(READ ${header} HEADER_CONTENT)
if(${header} MATCHES ".*.h$") string(REPLACE "paddle/fluid/platform/" "paddle/phi/" HEADER_CONTENT
file(READ ${header} HEADER_CONTENT) "${HEADER_CONTENT}")
string(REPLACE "paddle/phi/" "paddle/include/experimental/phi/" file(WRITE ${header} "${HEADER_CONTENT}")
HEADER_CONTENT "${HEADER_CONTENT}") message(STATUS "phi header path compat processing complete: ${header}")
string(REPLACE "paddle/utils/" "paddle/include/experimental/utils/" endif()
HEADER_CONTENT "${HEADER_CONTENT}") endforeach()
file(WRITE ${header} "${HEADER_CONTENT}")
message(STATUS "phi header path compat processing complete: ${header}")
endif()
endforeach()
endif()
endfunction() endfunction()
phi_header_path_compat(${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle)
phi_header_path_compat( phi_header_path_compat(
${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental) ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/phi)
phi_header_path_compat( phi_header_path_compat(
${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/api) ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/phi/api)
phi_header_path_compat( phi_header_path_compat(
${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/api/ext) ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/phi/api/ext)
phi_header_path_compat( phi_header_path_compat(
${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/api/include) ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/phi/api/include)
phi_header_path_compat( phi_header_path_compat(
${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/common) ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/phi/common)
phi_header_path_compat( phi_header_path_compat(
${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/phi/core) ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/phi/core)
# In order to be compatible with the original behavior, the header file name needs to be changed # NOTE(liuyuanle): In inference lib, no need include paddle/utils/pybind.h, so we delete this.
file(RENAME file(READ ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/extension.h
${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/extension.h HEADER_CONTENT)
${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/experimental/ext_all.h) string(REGEX REPLACE "#if !defined\\(PADDLE_ON_INFERENCE\\).*#endif" ""
HEADER_CONTENT "${HEADER_CONTENT}")
file(WRITE ${PADDLE_INFERENCE_INSTALL_DIR}/paddle/include/paddle/extension.h
"${HEADER_CONTENT}")
...@@ -48,3 +48,20 @@ function(find_python_module module) ...@@ -48,3 +48,20 @@ function(find_python_module module)
${PY_${module_upper}_VERSION} ${PY_${module_upper}_VERSION}
PARENT_SCOPE) PARENT_SCOPE)
endfunction() endfunction()
function(check_py_version py_version)
string(REPLACE "." ";" version_list ${py_version})
list(LENGTH version_list version_list_len)
if(version_list_len LESS 2)
message(FATAL_ERROR "Please input Python version, eg:3.7 or 3.8 and so on")
endif()
list(GET version_list 0 version_major)
list(GET version_list 1 version_minor)
if((version_major GREATER_EQUAL 3) AND (version_minor GREATER_EQUAL 7))
else()
message(FATAL_ERROR "Paddle only support Python version >=3.7 now!")
endif()
endfunction()
...@@ -29,7 +29,21 @@ set(third_party_deps) ...@@ -29,7 +29,21 @@ set(third_party_deps)
include(ProcessorCount) include(ProcessorCount)
ProcessorCount(NPROC) ProcessorCount(NPROC)
if(NOT WITH_SETUP_INSTALL)
#NOTE(risemeup1):Initialize any submodules.
message(
STATUS
"Check submodules of paddle, and run 'git submodule update --init --recursive'"
)
execute_process(
COMMAND git submodule update --init --recursive
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}
RESULT_VARIABLE result_var)
if(NOT result_var EQUAL 0)
message(FATAL_ERROR "Failed to get submodule, please check your network !")
endif()
endif()
# cache funciton to avoid repeat download code of third_party. # cache funciton to avoid repeat download code of third_party.
# This function has 4 parameters, URL / REPOSITOR / TAG / DIR: # This function has 4 parameters, URL / REPOSITOR / TAG / DIR:
# 1. URL: specify download url of 3rd party # 1. URL: specify download url of 3rd party
...@@ -245,15 +259,54 @@ if(${CMAKE_VERSION} VERSION_GREATER "3.5.2") ...@@ -245,15 +259,54 @@ if(${CMAKE_VERSION} VERSION_GREATER "3.5.2")
)# adds --depth=1 arg to git clone of External_Projects )# adds --depth=1 arg to git clone of External_Projects
endif() endif()
########################### include third_party according to flags ###############################
include(external/zlib) # download, build, install zlib include(external/zlib) # download, build, install zlib
include(external/gflags) # download, build, install gflags include(external/gflags) # download, build, install gflags
include(external/glog) # download, build, install glog include(external/glog) # download, build, install glog
########################### include third_party according to flags ###############################
if(WITH_CINN)
if(WITH_MKL)
add_definitions(-DCINN_WITH_MKL_CBLAS)
endif()
if(WITH_MKLDNN)
add_definitions(-DCINN_WITH_DNNL)
endif()
include(cmake/cinn/version.cmake)
if(NOT EXISTS ${CMAKE_BINARY_DIR}/cmake/cinn/config.cmake)
file(COPY ${PROJECT_SOURCE_DIR}/cmake/cinn/config.cmake
DESTINATION ${CMAKE_BINARY_DIR}/cmake/cinn)
endif()
include(${CMAKE_BINARY_DIR}/cmake/cinn/config.cmake)
include(cmake/cinn/external/absl.cmake)
include(cmake/cinn/external/llvm.cmake)
include(cmake/cinn/external/isl.cmake)
include(cmake/cinn/external/ginac.cmake)
include(cmake/cinn/external/openmp.cmake)
include(cmake/cinn/external/jitify.cmake)
endif()
# cinn_only includes third-party libraries separately
if(CINN_ONLY)
include(external/gtest)
include(external/protobuf)
if(WITH_PYTHON)
include(external/pybind11)
endif()
if(WITH_MKL)
include(external/mklml)
endif()
if(WITH_MKLDNN)
include(external/mkldnn)
endif()
return()
endif()
include(external/eigen) # download eigen3 include(external/eigen) # download eigen3
include(external/threadpool) # download threadpool include(external/threadpool) # download threadpool
include(external/dlpack) # download dlpack include(external/dlpack) # download dlpack
include(external/xxhash) # download, build, install xxhash include(external/xxhash) # download, build, install xxhash
include(external/warpctc) # download, build, install warpctc include(external/warpctc) # download, build, install warpctc
include(external/warprnnt) # download, build, install warprnnt
include(external/utf8proc) # download, build, install utf8proc include(external/utf8proc) # download, build, install utf8proc
list(APPEND third_party_deps extern_eigen3 extern_gflags extern_glog list(APPEND third_party_deps extern_eigen3 extern_gflags extern_glog
...@@ -264,6 +317,7 @@ list( ...@@ -264,6 +317,7 @@ list(
extern_zlib extern_zlib
extern_dlpack extern_dlpack
extern_warpctc extern_warpctc
extern_warprnnt
extern_threadpool extern_threadpool
extern_utf8proc) extern_utf8proc)
include(external/lapack) # download, build, install lapack include(external/lapack) # download, build, install lapack
...@@ -276,6 +330,7 @@ list( ...@@ -276,6 +330,7 @@ list(
extern_zlib extern_zlib
extern_dlpack extern_dlpack
extern_warpctc extern_warpctc
extern_warprnnt
extern_threadpool extern_threadpool
extern_lapack) extern_lapack)
...@@ -298,9 +353,9 @@ if(TARGET extern_protobuf) ...@@ -298,9 +353,9 @@ if(TARGET extern_protobuf)
list(APPEND third_party_deps extern_protobuf) list(APPEND third_party_deps extern_protobuf)
endif() endif()
if(WITH_PYTHON) if(NOT ((NOT WITH_PYTHON) AND ON_INFER))
include(external/python) # find python and python_module include(external/python) # find python and python_module
include(external/pybind11) # download pybind11 include(external/pybind11) # prepare submodule pybind11
list(APPEND third_party_deps extern_pybind) list(APPEND third_party_deps extern_pybind)
endif() endif()
...@@ -328,23 +383,21 @@ if(WITH_GPU) ...@@ -328,23 +383,21 @@ if(WITH_GPU)
${URL} "externalError" MD5 a712a49384e77ca216ad866712f7cafa ${URL} "externalError" MD5 a712a49384e77ca216ad866712f7cafa
)# download file externalErrorMsg.tar.gz )# download file externalErrorMsg.tar.gz
if(WITH_TESTING) if(WITH_TESTING)
# copy externalErrorMsg.pb, just for unittest can get error message correctly. # copy externalErrorMsg.pb for UnitTest
set(SRC_DIR ${THIRD_PARTY_PATH}/externalError/data) set(SRC_DIR ${THIRD_PARTY_PATH}/externalError/data)
if(WIN32 AND (NOT "${CMAKE_GENERATOR}" STREQUAL "Ninja")) # for python UT 'test_exception.py'
set(DST_DIR1 set(DST_DIR1
${CMAKE_BINARY_DIR}/paddle/fluid/third_party/externalError/data)
else()
set(DST_DIR1 ${CMAKE_BINARY_DIR}/paddle/third_party/externalError/data)
endif()
set(DST_DIR2
${CMAKE_BINARY_DIR}/python/paddle/include/third_party/externalError/data ${CMAKE_BINARY_DIR}/python/paddle/include/third_party/externalError/data
) )
# for C++ UT 'enforce_test'
set(DST_DIR2 ${CMAKE_BINARY_DIR}/paddle/third_party/externalError/data)
add_custom_command( add_custom_command(
TARGET download_externalError TARGET download_externalError
POST_BUILD POST_BUILD
COMMAND ${CMAKE_COMMAND} -E copy_directory ${SRC_DIR} ${DST_DIR1} COMMAND ${CMAKE_COMMAND} -E copy_directory ${SRC_DIR} ${DST_DIR1}
COMMAND ${CMAKE_COMMAND} -E copy_directory ${SRC_DIR} ${DST_DIR2} COMMAND ${CMAKE_COMMAND} -E copy_directory ${SRC_DIR} ${DST_DIR2}
COMMENT "copy_directory from ${SRC_DIR} to ${DST_DIR}") COMMENT "copy_directory from ${SRC_DIR} to ${DST_DIR1}"
COMMENT "copy_directory from ${SRC_DIR} to ${DST_DIR2}")
endif() endif()
endif() endif()
...@@ -353,11 +406,6 @@ if(WITH_XPU) ...@@ -353,11 +406,6 @@ if(WITH_XPU)
list(APPEND third_party_deps extern_xpu) list(APPEND third_party_deps extern_xpu)
endif() endif()
if(WITH_MLU)
include(external/concurrentqueue) # download, build, install concurrentqueue
list(APPEND third_party_deps extern_concurrentqueue)
endif()
if(WITH_PSLIB) if(WITH_PSLIB)
include(external/pslib) # download, build, install pslib include(external/pslib) # download, build, install pslib
list(APPEND third_party_deps extern_pslib) list(APPEND third_party_deps extern_pslib)
...@@ -391,16 +439,6 @@ if(WITH_BOX_PS) ...@@ -391,16 +439,6 @@ if(WITH_BOX_PS)
list(APPEND third_party_deps extern_box_ps) list(APPEND third_party_deps extern_box_ps)
endif() endif()
if(WITH_ASCEND OR WITH_ASCEND_CL)
include(external/ascend)
if(WITH_ASCEND OR WITH_ASCEND_CL)
list(APPEND third_party_deps extern_ascend)
endif()
if(WITH_ASCEND_CL)
list(APPEND third_party_deps extern_ascend_cl)
endif()
endif()
if(WITH_PSCORE) if(WITH_PSCORE)
include(external/snappy) include(external/snappy)
list(APPEND third_party_deps extern_snappy) list(APPEND third_party_deps extern_snappy)
...@@ -421,10 +459,39 @@ if(WITH_PSCORE) ...@@ -421,10 +459,39 @@ if(WITH_PSCORE)
include(external/rocksdb) # download, build, install rocksdb include(external/rocksdb) # download, build, install rocksdb
list(APPEND third_party_deps extern_rocksdb) list(APPEND third_party_deps extern_rocksdb)
include(external/jemalloc) # download, build, install jemalloc
list(APPEND third_party_deps extern_jemalloc)
endif()
if(WITH_RPC
AND NOT WITH_PSCORE
AND NOT WITH_PSLIB)
include(external/snappy)
list(APPEND third_party_deps extern_snappy)
include(external/leveldb)
list(APPEND third_party_deps extern_leveldb)
include(external/brpc)
list(APPEND third_party_deps extern_brpc)
endif()
if(WITH_DISTRIBUTE
AND NOT WITH_PSLIB
AND NOT WITH_PSCORE
AND NOT WITH_RPC)
include(external/snappy)
list(APPEND third_party_deps extern_snappy)
include(external/leveldb)
list(APPEND third_party_deps extern_leveldb)
include(external/brpc)
list(APPEND third_party_deps extern_brpc)
endif() endif()
if(WITH_XBYAK) if(WITH_XBYAK)
include(external/xbyak) # download, build, install xbyak include(external/xbyak) # prepare submodule xbyak
list(APPEND third_party_deps extern_xbyak) list(APPEND third_party_deps extern_xbyak)
endif() endif()
...@@ -445,20 +512,6 @@ if(WITH_LITE) ...@@ -445,20 +512,6 @@ if(WITH_LITE)
include(external/lite) include(external/lite)
endif() endif()
if(WITH_CINN)
message(STATUS "Compile Paddle with CINN.")
include(external/cinn)
add_definitions(-DPADDLE_WITH_CINN)
if(WITH_GPU)
add_definitions(-DCINN_WITH_CUDA)
add_definitions(-DCINN_WITH_CUDNN)
endif()
if(WITH_MKL)
add_definitions(-DCINN_WITH_MKL_CBLAS)
add_definitions(-DCINN_WITH_MKLDNN)
endif()
endif()
if(WITH_CRYPTO) if(WITH_CRYPTO)
include(external/cryptopp) # download, build, install cryptopp include(external/cryptopp) # download, build, install cryptopp
list(APPEND third_party_deps extern_cryptopp) list(APPEND third_party_deps extern_cryptopp)
...@@ -476,11 +529,6 @@ if(WIN32) ...@@ -476,11 +529,6 @@ if(WIN32)
list(APPEND third_party_deps extern_dirent) list(APPEND third_party_deps extern_dirent)
endif() endif()
if(WITH_INFRT)
include(external/llvm)
list(APPEND third_party_deps ${llvm_libs})
endif()
if(WITH_IPU) if(WITH_IPU)
include(external/poplar) include(external/poplar)
list(APPEND third_party_deps extern_poplar) list(APPEND third_party_deps extern_poplar)
...@@ -498,7 +546,23 @@ if(WITH_GPU ...@@ -498,7 +546,23 @@ if(WITH_GPU
if(${CMAKE_CUDA_COMPILER_VERSION} GREATER_EQUAL 11.0) if(${CMAKE_CUDA_COMPILER_VERSION} GREATER_EQUAL 11.0)
include(external/cutlass) # download, build, install cusparselt include(external/cutlass) # download, build, install cusparselt
list(APPEND third_party_deps extern_cutlass) list(APPEND third_party_deps extern_cutlass)
set(WITH_CUTLASS ON)
endif()
if(${CMAKE_CUDA_COMPILER_VERSION} GREATER_EQUAL 11.4)
foreach(arch ${NVCC_ARCH_BIN})
if(${arch} GREATER_EQUAL 80)
include(external/flashattn)
list(APPEND third_party_deps extern_flashattn)
set(WITH_FLASHATTN ON)
break()
endif()
endforeach()
endif() endif()
endif() endif()
if(WITH_CUDNN_FRONTEND)
include(external/cudnn-frontend) # download cudnn-frontend
list(APPEND third_party_deps extern_cudnn_frontend)
endif()
add_custom_target(third_party ALL DEPENDS ${third_party_deps}) add_custom_target(third_party ALL DEPENDS ${third_party_deps})
...@@ -71,3 +71,46 @@ math(EXPR PADDLE_VERSION_INTEGER "${PADDLE_MAJOR_VER} * 1000000 ...@@ -71,3 +71,46 @@ math(EXPR PADDLE_VERSION_INTEGER "${PADDLE_MAJOR_VER} * 1000000
add_definitions(-DPADDLE_VERSION=${PADDLE_VERSION}) add_definitions(-DPADDLE_VERSION=${PADDLE_VERSION})
add_definitions(-DPADDLE_VERSION_INTEGER=${PADDLE_VERSION_INTEGER}) add_definitions(-DPADDLE_VERSION_INTEGER=${PADDLE_VERSION_INTEGER})
message(STATUS "Paddle version is ${PADDLE_VERSION}") message(STATUS "Paddle version is ${PADDLE_VERSION}")
# write paddle version
function(version version_file)
execute_process(
COMMAND ${GIT_EXECUTABLE} log --pretty=format:%H -1
WORKING_DIRECTORY ${PADDLE_SOURCE_DIR}
OUTPUT_VARIABLE PADDLE_GIT_COMMIT)
file(
WRITE ${version_file}
"Paddle version: ${PADDLE_VERSION}\n"
"GIT COMMIT ID: ${PADDLE_GIT_COMMIT}\n"
"WITH_MKL: ${WITH_MKL}\n"
"WITH_MKLDNN: ${WITH_MKLDNN}\n"
"WITH_GPU: ${WITH_GPU}\n"
"WITH_ROCM: ${WITH_ROCM}\n"
"WITH_IPU: ${WITH_IPU}\n")
if(WITH_GPU)
file(APPEND ${version_file}
"CUDA version: ${CUDA_VERSION}\n"
"CUDNN version: v${CUDNN_MAJOR_VERSION}.${CUDNN_MINOR_VERSION}\n")
endif()
if(WITH_ROCM)
file(APPEND ${version_file}
"HIP version: v${HIP_MAJOR_VERSION}.${HIP_MINOR_VERSION}\n"
"MIOpen version: v${MIOPEN_MAJOR_VERSION}.${MIOPEN_MINOR_VERSION}\n")
endif()
if(WITH_IPU)
file(APPEND ${version_file} "PopART version: ${POPART_VERSION}\n")
endif()
file(APPEND ${version_file}
"CXX compiler version: ${CMAKE_CXX_COMPILER_VERSION}\n")
if(TENSORRT_FOUND)
file(
APPEND ${version_file}
"WITH_TENSORRT: ${TENSORRT_FOUND}\n"
"TensorRT version: v${TENSORRT_MAJOR_VERSION}.${TENSORRT_MINOR_VERSION}.${TENSORRT_PATCH_VERSION}.${TENSORRT_BUILD_VERSION}\n"
)
endif()
if(WITH_LITE)
file(APPEND ${version_file} "WITH_LITE: ${WITH_LITE}\n"
"LITE_GIT_TAG: ${LITE_GIT_TAG}\n")
endif()
endfunction()
# For Readers and Developers # For Readers and Developers
Thanks for reading PaddlePaddle documentation. Thanks for reading PaddlePaddle documentation.
Since **September 17th, 2018**, the **0.15.0 and develop** documentation source has been moved to [FluidDoc Repo](https://github.com/PaddlePaddle/FluidDoc) and updated there. Since **September 17th, 2018**, the **0.15.0 and develop** documentation source has been moved to [FluidDoc Repo](https://github.com/PaddlePaddle/FluidDoc) and updated there.
......
...@@ -20,7 +20,7 @@ PORT_LOCK_FILE=/tmp/paddle_test_ports.lock ...@@ -20,7 +20,7 @@ PORT_LOCK_FILE=/tmp/paddle_test_ports.lock
touch $PORT_FILE $PORT_LOCK_FILE 2>/dev/null touch $PORT_FILE $PORT_LOCK_FILE 2>/dev/null
chmod a+rw $PORT_FILE $PORT_LOCK_FILE 2>/dev/null chmod a+rw $PORT_FILE $PORT_LOCK_FILE 2>/dev/null
# acquire a range of ports that not used by other runtests.sh currentlly. # acquire a range of ports that not used by other runtests.sh currently.
# return 1 if ports is used by other, otherwise return 0. # return 1 if ports is used by other, otherwise return 0.
# NOTE: the acquire_ports/release_ports is interprocess mutexed. # NOTE: the acquire_ports/release_ports is interprocess mutexed.
# #
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment