"git@developer.sourcefind.cn:wangsen/mineru.git" did not exist on "fb6db2d9cf5e21b257012a3b748c28053ecb57df"
Commit a715222c authored by yuguo's avatar yuguo
Browse files

0.9.1-rocm

parent f262efc9
# Changelog for OneFlow v0.8.0
## v0.8.0-dev
### 1. Enhancements
#### Eager
- placeholder
#### System
- placeholder
#### Build
### 2. Bug fixes
#### Placeholder
### 3. Deprecations
#### Single client
## v0.7.0
The CHANGELOG for v0.7.0 releases can be found [in the v0.7.0 tag](https://github.com/Oneflow-Inc/oneflow/releases/tag/v0.7.0).
...@@ -6,9 +6,11 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON CACHE BOOL "") ...@@ -6,9 +6,11 @@ set(CMAKE_EXPORT_COMPILE_COMMANDS ON CACHE BOOL "")
option(THIRD_PARTY "Build third party" ON) option(THIRD_PARTY "Build third party" ON)
option(ONEFLOW "Build oneflow" ON) option(ONEFLOW "Build oneflow" ON)
if(NOT THIRD_PARTY AND NOT ONEFLOW) if(NOT THIRD_PARTY AND NOT ONEFLOW)
message(FATAL_ERROR "at least one of flags THIRD_PARTY and ONEFLOW should be ON") message(FATAL_ERROR "at least one of flags THIRD_PARTY and ONEFLOW should be ON")
endif() endif()
option(USE_CLANG_FORMAT "" OFF) option(USE_CLANG_FORMAT "" OFF)
option(USE_CLANG_TIDY "" OFF) option(USE_CLANG_TIDY "" OFF)
option(BUILD_PYTHON "" ON) option(BUILD_PYTHON "" ON)
...@@ -16,6 +18,7 @@ option(BUILD_CPP_API "Option to build OneFlow C++ API (beta)" OFF) ...@@ -16,6 +18,7 @@ option(BUILD_CPP_API "Option to build OneFlow C++ API (beta)" OFF)
option(BUILD_RDMA "" OFF) option(BUILD_RDMA "" OFF)
option(BUILD_CUDA "" ON) option(BUILD_CUDA "" ON)
option(BUILD_ROCM "" OFF) option(BUILD_ROCM "" OFF)
option(BUILD_ROCM_GRAPHS "" OFF)
option(BUILD_TESTING "" OFF) option(BUILD_TESTING "" OFF)
option(BUILD_GIT_VERSION "" ON) option(BUILD_GIT_VERSION "" ON)
option(BUILD_PROFILER "" OFF) option(BUILD_PROFILER "" OFF)
...@@ -29,6 +32,7 @@ option(OF_SOFTMAX_USE_FAST_MATH "" ON) ...@@ -29,6 +32,7 @@ option(OF_SOFTMAX_USE_FAST_MATH "" ON)
option(OF_LAYER_NORM_USE_FAST_MATH "" ON) option(OF_LAYER_NORM_USE_FAST_MATH "" ON)
option(TREAT_WARNINGS_AS_ERRORS "" ON) option(TREAT_WARNINGS_AS_ERRORS "" ON)
option(MAYBE_NEED_ERROR_MSG_CHECK "" OFF) option(MAYBE_NEED_ERROR_MSG_CHECK "" OFF)
# Reference: # Reference:
# https://medium.com/@alasher/colored-c-compiler-output-with-ninja-clang-gcc-10bfe7f2b949 # https://medium.com/@alasher/colored-c-compiler-output-with-ninja-clang-gcc-10bfe7f2b949
option(OF_FORCE_COLORED_DIAGNOSTICS "Always produce ANSI-colored diagnostics (GNU/Clang only)." ON) option(OF_FORCE_COLORED_DIAGNOSTICS "Always produce ANSI-colored diagnostics (GNU/Clang only)." ON)
...@@ -37,18 +41,14 @@ if (BUILD_CUDA AND BUILD_ROCM) ...@@ -37,18 +41,14 @@ if (BUILD_CUDA AND BUILD_ROCM)
message(FATAL_ERROR "Compile with cuda and rocm enabled simultaneously are not supported right now.") message(FATAL_ERROR "Compile with cuda and rocm enabled simultaneously are not supported right now.")
endif() endif()
if (BUILD_ROCM) set(ONEFLOW_CURRENT_VERSION 0.8.1.dev CACHE STRING "")
# Search for rocm in common locations
list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH}/hip ${ROCM_PATH} ${ROCM_PATH}/llvm)
endif()
set(ONEFLOW_CURRENT_VERSION 0.7.0.dev CACHE STRING "")
if(BUILD_FOR_CI) if(BUILD_FOR_CI)
set(ONEFLOW_CURRENT_VERSION ci) set(ONEFLOW_CURRENT_VERSION ci)
endif() endif()
set(LLVM_PROVIDER "in-tree" CACHE STRING "in-tree, install") set(LLVM_PROVIDER "in-tree" CACHE STRING "in-tree, install")
if(NOT WITH_MLIR) if(NOT WITH_MLIR)
set(LLVM_PROVIDER "install" set(LLVM_PROVIDER "install"
CACHE STRING "in-tree will build LLVM's ALL, not what we want when not building MLIR" FORCE) CACHE STRING "in-tree will build LLVM's ALL, not what we want when not building MLIR" FORCE)
...@@ -70,6 +70,14 @@ set(CUDNN_STATIC OFF CACHE BOOL "") ...@@ -70,6 +70,14 @@ set(CUDNN_STATIC OFF CACHE BOOL "")
project(oneflow C CXX) project(oneflow C CXX)
if (BUILD_ROCM)
# Search for rocm in common locations
#FIND_PACKAGE(HIP REQUIRED)
list(APPEND CMAKE_PREFIX_PATH ${ROCM_PATH} ${ROCM_PATH}/hip ${ROCM_PATH}/llvm)
set(CMAKE_MODULE_PATH "${HIP_PATH}/cmake" ${CMAKE_MODULE_PATH})
set(CMAKE_MODULE_PATH "${HIP_PATH}/lib/cmake/hip" ${CMAKE_MODULE_PATH})
endif()
if(NOT CMAKE_BUILD_TYPE) if(NOT CMAKE_BUILD_TYPE)
message(STATUS "No build type selected, default to Release") message(STATUS "No build type selected, default to Release")
set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Build type (default Release)" FORCE) set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Build type (default Release)" FORCE)
...@@ -81,13 +89,16 @@ if(NOT CMAKE_BUILD_TYPE MATCHES "^(Debug|Release|RelWithDebInfo|MinSizeRel)$") ...@@ -81,13 +89,16 @@ if(NOT CMAKE_BUILD_TYPE MATCHES "^(Debug|Release|RelWithDebInfo|MinSizeRel)$")
"Expected CMAKE_BUILD_TYPE is Debug, Release, RelWithDebInfo or MinSizeRel, got ${CMAKE_BUILD_TYPE}" "Expected CMAKE_BUILD_TYPE is Debug, Release, RelWithDebInfo or MinSizeRel, got ${CMAKE_BUILD_TYPE}"
) )
endif() endif()
message(STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}") message(STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}")
set(COMPILER_VERSION_ERROR_MSG "At least gcc 7, clang 5 or Apple clang 12 is supported.") set(COMPILER_VERSION_ERROR_MSG "At least gcc 7, clang 5 or Apple clang 12 is supported.")
if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU") if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
if("${CMAKE_CXX_COMPILER_VERSION}" VERSION_LESS 7) if("${CMAKE_CXX_COMPILER_VERSION}" VERSION_LESS 7)
message(FATAL_ERROR ${COMPILER_VERSION_ERROR_MSG}) message(FATAL_ERROR ${COMPILER_VERSION_ERROR_MSG})
endif() endif()
if(CPU_THREADING_RUNTIME STREQUAL "OMP") if(CPU_THREADING_RUNTIME STREQUAL "OMP")
set(OMP_FLAGS "-fopenmp") set(OMP_FLAGS "-fopenmp")
endif() endif()
...@@ -102,6 +113,7 @@ elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang") ...@@ -102,6 +113,7 @@ elseif("${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang")
) )
endif() endif()
endif() endif()
if("${CMAKE_CXX_COMPILER_VERSION}" VERSION_LESS 5) if("${CMAKE_CXX_COMPILER_VERSION}" VERSION_LESS 5)
message(FATAL_ERROR ${COMPILER_VERSION_ERROR_MSG}) message(FATAL_ERROR ${COMPILER_VERSION_ERROR_MSG})
endif() endif()
...@@ -117,6 +129,7 @@ set(oneflow_cmake_dir ${PROJECT_SOURCE_DIR}/cmake) ...@@ -117,6 +129,7 @@ set(oneflow_cmake_dir ${PROJECT_SOURCE_DIR}/cmake)
get_filename_component(real_src_dir "${CMAKE_SOURCE_DIR}" REALPATH) get_filename_component(real_src_dir "${CMAKE_SOURCE_DIR}" REALPATH)
get_filename_component(real_bin_dir "${CMAKE_BINARY_DIR}" REALPATH) get_filename_component(real_bin_dir "${CMAKE_BINARY_DIR}" REALPATH)
if("${real_src_dir}" STREQUAL "${real_bin_dir}") if("${real_src_dir}" STREQUAL "${real_bin_dir}")
message(FATAL_ERROR "In-source build not allowed") message(FATAL_ERROR "In-source build not allowed")
endif() endif()
...@@ -132,32 +145,40 @@ if(NOT DEFINED USE_CXX11_ABI) ...@@ -132,32 +145,40 @@ if(NOT DEFINED USE_CXX11_ABI)
set(USE_CXX11_ABI ${CXX11_ABI_AVAILABLE}) set(USE_CXX11_ABI ${CXX11_ABI_AVAILABLE})
elseif(USE_CXX11_ABI) elseif(USE_CXX11_ABI)
check_cxx11_abi(CXX11_ABI_AVAILABLE) check_cxx11_abi(CXX11_ABI_AVAILABLE)
if(NOT CXX11_ABI_AVAILABLE) if(NOT CXX11_ABI_AVAILABLE)
message(FATAL_ERROR "cxx11 abi is not available for current compiler") message(FATAL_ERROR "cxx11 abi is not available for current compiler")
endif() endif()
endif() endif()
message(STATUS "USE_CXX11_ABI: ${USE_CXX11_ABI}") message(STATUS "USE_CXX11_ABI: ${USE_CXX11_ABI}")
if(WITH_MLIR) if(WITH_MLIR)
add_definitions(-DWITH_MLIR) add_definitions(-DWITH_MLIR)
if(WITH_MLIR_CUDA_CODEGEN) if(WITH_MLIR_CUDA_CODEGEN)
add_definitions(-DWITH_MLIR_CUDA_CODEGEN) add_definitions(-DWITH_MLIR_CUDA_CODEGEN)
endif() endif()
endif() endif()
if(WITH_COCOAPI) if(WITH_COCOAPI)
add_definitions(-DWITH_COCOAPI) add_definitions(-DWITH_COCOAPI)
endif() endif()
if(USE_CXX11_ABI) if(USE_CXX11_ABI)
add_definitions(-D_GLIBCXX_USE_CXX11_ABI=1) add_definitions(-D_GLIBCXX_USE_CXX11_ABI=1)
else() else()
add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0) add_definitions(-D_GLIBCXX_USE_CXX11_ABI=0)
endif() endif()
if(BUILD_PROFILER) if(BUILD_PROFILER)
add_definitions(-DOF_ENABLE_PROFILER) add_definitions(-DOF_ENABLE_PROFILER)
endif() endif()
if(OF_SOFTMAX_USE_FAST_MATH) if(OF_SOFTMAX_USE_FAST_MATH)
add_definitions(-DOF_SOFTMAX_USE_FAST_MATH) add_definitions(-DOF_SOFTMAX_USE_FAST_MATH)
endif() endif()
if(OF_LAYER_NORM_USE_FAST_MATH) if(OF_LAYER_NORM_USE_FAST_MATH)
add_definitions(-DOF_LAYER_NORM_USE_FAST_MATH) add_definitions(-DOF_LAYER_NORM_USE_FAST_MATH)
endif() endif()
...@@ -178,14 +199,17 @@ if(OF_FORCE_COLORED_DIAGNOSTICS) ...@@ -178,14 +199,17 @@ if(OF_FORCE_COLORED_DIAGNOSTICS)
$<$<COMPILE_LANGUAGE:CXX>:$<$<CXX_COMPILER_ID:Clang>:-fcolor-diagnostics>> $<$<COMPILE_LANGUAGE:CXX>:$<$<CXX_COMPILER_ID:Clang>:-fcolor-diagnostics>>
$<$<COMPILE_LANGUAGE:CUDA>:$<$<CUDA_COMPILER_ID:Clang>:-fcolor-diagnostics>>) $<$<COMPILE_LANGUAGE:CUDA>:$<$<CUDA_COMPILER_ID:Clang>:-fcolor-diagnostics>>)
endif() endif()
if(RPC_BACKEND MATCHES "GRPC") if(RPC_BACKEND MATCHES "GRPC")
add_definitions(-DRPC_BACKEND_GRPC) add_definitions(-DRPC_BACKEND_GRPC)
message(STATUS "RPC backend enabled: gRPC") message(STATUS "RPC backend enabled: gRPC")
set(SUPPORTED_RPC_BACKEND_FOUND 1) set(SUPPORTED_RPC_BACKEND_FOUND 1)
endif() endif()
if(WITH_ONEDNN) if(WITH_ONEDNN)
add_definitions(-DWITH_ONEDNN) add_definitions(-DWITH_ONEDNN)
endif() endif()
add_definitions(-DRPC_BACKEND_LOCAL) add_definitions(-DRPC_BACKEND_LOCAL)
message(STATUS "RPC backend enabled: local") message(STATUS "RPC backend enabled: local")
enable_testing() enable_testing()
...@@ -236,19 +260,37 @@ if(WIN32) ...@@ -236,19 +260,37 @@ if(WIN32)
endif() endif()
endforeach() endforeach()
#set(CMAKE_EXE_LINKER_FLAGS_DEBUG "${CMAKE_EXE_LINKER_FLAGS} /DEBUG:FASTLINK") # set(CMAKE_EXE_LINKER_FLAGS_DEBUG "${CMAKE_EXE_LINKER_FLAGS} /DEBUG:FASTLINK")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /D_ITERATOR_DEBUG_LEVEL=0") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} /D_ITERATOR_DEBUG_LEVEL=0")
else() else()
set(EXTRA_CXX_FLAGS "-std=c++14 -Wall -Wno-sign-compare -Wno-unused-function -fPIC") set(EXTRA_CXX_FLAGS "-std=c++14 -Wall -Wno-sign-compare -Wno-unused-function -fPIC")
if(APPLE) if(APPLE)
set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-deprecated-declarations") set(EXTRA_CXX_FLAGS "${EXTRA_CXX_FLAGS} -Wno-deprecated-declarations")
endif() endif()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${EXTRA_CXX_FLAGS}")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${EXTRA_CXX_FLAGS}") set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} ${EXTRA_CXX_FLAGS}")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${EXTRA_CXX_FLAGS}") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} ${EXTRA_CXX_FLAGS}")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} ${EXTRA_CXX_FLAGS}") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "${CMAKE_CXX_FLAGS_RELWITHDEBINFO} ${EXTRA_CXX_FLAGS}")
endif() endif()
if((ENABLE_ASAN OR ENABLE_UBSAN) AND ENABLE_TSAN)
message(FATAL_ERROR "Only ASAN and UBSAN can be enabled at the same time.")
endif()
if(ENABLE_ASAN)
add_compile_options(-fsanitize=address -fno-omit-frame-pointer)
add_link_options(-fsanitize=address -fno-omit-frame-pointer)
endif()
if(ENABLE_UBSAN)
add_compile_options(-fsanitize=undefined)
add_link_options(-fsanitize=undefined)
endif()
if(ENABLE_TSAN)
add_compile_options(-fsanitize=thread)
add_link_options(-fsanitize=thread)
endif()
if(BUILD_PYTHON) if(BUILD_PYTHON)
set(ONEFLOW_INCLUDE_DIR "${ONEFLOW_PYTHON_DIR}/oneflow/include") set(ONEFLOW_INCLUDE_DIR "${ONEFLOW_PYTHON_DIR}/oneflow/include")
endif(BUILD_PYTHON) endif(BUILD_PYTHON)
...@@ -274,38 +316,55 @@ set(KINETO_URL ...@@ -274,38 +316,55 @@ set(KINETO_URL
use_mirror(VARIABLE KINETO_URL URL ${KINETO_URL}) use_mirror(VARIABLE KINETO_URL URL ${KINETO_URL})
set(KINETO_MD5 f9b550591b3899fb267270c19484933f) set(KINETO_MD5 f9b550591b3899fb267270c19484933f)
set(CUTLASS_URL
https://github.com/Oneflow-Inc/cutlass/archive/34b3d940ad5e0e20776d0372b4e70092e67c268c.zip)
use_mirror(VARIABLE CUTLASS_URL URL ${CUTLASS_URL})
set(CUTLASS_MD5 b75e600310e576811d89d37578d66fdb)
include(cuda) include(cuda)
add_subdirectory(external)
include(third_party)
if(BUILD_CUDA) if(BUILD_CUDA)
# NOTE: if you want to use source PTX with a version different from produced PTX/binary, you should add flags # NOTE: if you want to use source PTX with a version different from produced PTX/binary, you should add flags
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES) if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
list(APPEND CMAKE_CUDA_ARCHITECTURES 60-real) list(APPEND CMAKE_CUDA_ARCHITECTURES 60-real)
# Tesla P40/P4, Quadro Pxxx/Pxxxx, GeForce GTX 10xx, TITAN X/Xp # Tesla P40/P4, Quadro Pxxx/Pxxxx, GeForce GTX 10xx, TITAN X/Xp
list(APPEND CMAKE_CUDA_ARCHITECTURES 61-real) list(APPEND CMAKE_CUDA_ARCHITECTURES 61-real)
# V100, TITAN V # V100, TITAN V
list(APPEND CMAKE_CUDA_ARCHITECTURES 70-real) list(APPEND CMAKE_CUDA_ARCHITECTURES 70-real)
if(CUDA_VERSION VERSION_GREATER_EQUAL "10.0") if(CUDA_VERSION VERSION_GREATER_EQUAL "10.0")
# T4, Quadro RTX xxxx, Txxxx, Geforce RTX 20xx, TITAN RTX # T4, Quadro RTX xxxx, Txxxx, Geforce RTX 20xx, TITAN RTX
list(APPEND CMAKE_CUDA_ARCHITECTURES 75-real) list(APPEND CMAKE_CUDA_ARCHITECTURES 75-real)
endif() endif()
if(CUDA_VERSION VERSION_GREATER_EQUAL "11.0") if(CUDA_VERSION VERSION_GREATER_EQUAL "11.0")
# A100 # A100
list(APPEND CMAKE_CUDA_ARCHITECTURES 80-real) list(APPEND CMAKE_CUDA_ARCHITECTURES 80-real)
endif() endif()
if(CUDA_VERSION VERSION_GREATER_EQUAL "11.1") if(CUDA_VERSION VERSION_GREATER_EQUAL "11.1")
# GeForce RTX 30xx # GeForce RTX 30xx
list(APPEND CMAKE_CUDA_ARCHITECTURES 86-real) list(APPEND CMAKE_CUDA_ARCHITECTURES 86-real)
endif() endif()
if(CUDA_VERSION VERSION_GREATER_EQUAL "11.0")
list(APPEND CMAKE_CUDA_ARCHITECTURES 80-virtual) if(CUDA_VERSION VERSION_GREATER_EQUAL "11.8")
elseif(CUDA_VERSION VERSION_GREATER_EQUAL "10.0") # GeForce RTX 40xx
list(APPEND CMAKE_CUDA_ARCHITECTURES 75-virtual) list(APPEND CMAKE_CUDA_ARCHITECTURES 89-real)
else() # NVIDIA H100
list(APPEND CMAKE_CUDA_ARCHITECTURES 70-virtual) list(APPEND CMAKE_CUDA_ARCHITECTURES 90-real)
endif() endif()
endif() endif()
foreach(CUDA_ARCH ${CMAKE_CUDA_ARCHITECTURES})
if(CUDA_ARCH MATCHES "^([0-9]+)\\-real$")
list(APPEND CUDA_REAL_ARCHS_LIST ${CMAKE_MATCH_1})
elseif(CUDA_ARCH MATCHES "^([0-9]+)$")
list(APPEND CUDA_REAL_ARCHS_LIST ${CMAKE_MATCH_1})
endif()
endforeach()
enable_language(CUDA) enable_language(CUDA)
include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) include_directories(${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
message(STATUS "CMAKE_CUDA_ARCHITECTURES: ${CMAKE_CUDA_ARCHITECTURES}") message(STATUS "CMAKE_CUDA_ARCHITECTURES: ${CMAKE_CUDA_ARCHITECTURES}")
...@@ -316,16 +375,22 @@ if(BUILD_CUDA) ...@@ -316,16 +375,22 @@ if(BUILD_CUDA)
set(CUDA_NVCC_THREADS_NUMBER "4" CACHE STRING "") set(CUDA_NVCC_THREADS_NUMBER "4" CACHE STRING "")
list(APPEND CUDA_NVCC_FLAGS -t ${CUDA_NVCC_THREADS_NUMBER}) list(APPEND CUDA_NVCC_FLAGS -t ${CUDA_NVCC_THREADS_NUMBER})
endif() endif()
list(APPEND CUDA_NVCC_FLAGS "-Xcompiler=-fno-strict-aliasing")
message(STATUS "CUDA_NVCC_FLAGS: " ${CUDA_NVCC_FLAGS}) message(STATUS "CUDA_NVCC_FLAGS: " ${CUDA_NVCC_FLAGS})
list(JOIN CUDA_NVCC_FLAGS " " CMAKE_CUDA_FLAGS) list(JOIN CUDA_NVCC_FLAGS " " CMAKE_CUDA_FLAGS)
endif() endif()
endif() endif()
add_subdirectory(external)
include(third_party)
message(STATUS "CMAKE_CXX_COMPILER_VERSION: " ${CMAKE_CXX_COMPILER_VERSION}) message(STATUS "CMAKE_CXX_COMPILER_VERSION: " ${CMAKE_CXX_COMPILER_VERSION})
add_custom_target(oneflow_deps ALL DEPENDS prepare_oneflow_third_party) add_custom_target(oneflow_deps ALL DEPENDS prepare_oneflow_third_party)
# skip oneflow cmake to avoid errors caused by the absences of python-dev, proto src # skip oneflow cmake to avoid errors caused by the absences of python-dev, proto src
if(ONEFLOW) if(ONEFLOW)
include(oneflow) include(oneflow)
endif() endif()
add_subdirectory(ci) add_subdirectory(ci)
# OneFlow # OneFlow
OneFlow is a deep learning framework designed to be **user-friendly, scalable and efficient**. With OneFlow, it is easy to: OneFlow is a deep learning framework designed to be **user-friendly, scalable and efficient**. With OneFlow, it is easy to:
- program a model with **PyTorch-like API** - program a model with **PyTorch-like API**
- scale a model to n-dimensional-parallel/distributed execution with the **Global View API** - scale a model to n-dimensional-parallel/distributed execution with the **Global View API**
- accelerate/deploy a model with the **Static Graph Compiler**. - accelerate/deploy a model with the **Static Graph Compiler**.
...@@ -12,8 +13,8 @@ OneFlow is a deep learning framework designed to be **user-friendly, scalable an ...@@ -12,8 +13,8 @@ OneFlow is a deep learning framework designed to be **user-friendly, scalable an
## Latest News ## Latest News
- Version 0.8.0 is out! - Version 0.9.0 is out!
- [Full changelog](https://github.com/Oneflow-Inc/oneflow/releases/tag/v0.8.0) - [Full changelog](https://github.com/Oneflow-Inc/oneflow/releases/tag/v0.9.0)
## Publication ## Publication
...@@ -60,14 +61,14 @@ OneFlow is a deep learning framework designed to be **user-friendly, scalable an ...@@ -60,14 +61,14 @@ OneFlow is a deep learning framework designed to be **user-friendly, scalable an
- To install nightly release of OneFlow with CUDA support: - To install nightly release of OneFlow with CUDA support:
```bash ```bash
python3 -m pip install --pre oneflow -f https://staging.oneflow.info/branch/master/cu102 python3 -m pip install --pre oneflow -f https://staging.oneflow.info/branch/master/cu117
``` ```
- To install other available builds for different variants: - To install other available builds for different variants:
- Stable - Stable
```bash ```bash
python3 -m pip install --find-links https://release.oneflow.info oneflow==0.8.0+[PLATFORM] python3 -m pip install --find-links https://release.oneflow.info oneflow==0.9.0+cu117
``` ```
- Nightly - Nightly
``` ```
...@@ -76,7 +77,7 @@ OneFlow is a deep learning framework designed to be **user-friendly, scalable an ...@@ -76,7 +77,7 @@ OneFlow is a deep learning framework designed to be **user-friendly, scalable an
- All available `[PLATFORM]`: - All available `[PLATFORM]`:
| Platform |CUDA Driver Version| Supported GPUs | | Platform |CUDA Driver Version| Supported GPUs |
|---|---|---| |---|---|---|
| cu112 | >= 450.80.02 | GTX 10xx, RTX 20xx, A100, RTX 30xx | | cu117 | >= 450.80.02 | GTX 10xx, RTX 20xx, A100, RTX 30xx |
| cu102 | >= 440.33 | GTX 10xx, RTX 20xx | | cu102 | >= 440.33 | GTX 10xx, RTX 20xx |
| cpu | N/A | N/A | | cpu | N/A | N/A |
...@@ -89,8 +90,7 @@ OneFlow is a deep learning framework designed to be **user-friendly, scalable an ...@@ -89,8 +90,7 @@ OneFlow is a deep learning framework designed to be **user-friendly, scalable an
### Use docker image ### Use docker image
``` ```
docker pull oneflowinc/oneflow:nightly-cuda10.2 docker pull oneflowinc/oneflow:nightly-cuda11.7
docker pull oneflowinc/oneflow:nightly-cuda11.2
``` ```
### Build from Source ### Build from Source
......
# Monkey patch to not ship libjvm.so in pypi wheels
import sys
from auditwheel.main import main
from auditwheel.policy import _POLICIES as POLICIES
# libjvm is loaded dynamically; do not include it
for p in POLICIES:
p['lib_whitelist'].append('librccl.so.1')
p['lib_whitelist'].append('libhipblas.so.0')
p['lib_whitelist'].append('libhiprand.so.1')
p['lib_whitelist'].append('librocrand.so.1')
p['lib_whitelist'].append('libMIOpen.so.1')
p['lib_whitelist'].append('libgalaxyhip.so.4')
p['lib_whitelist'].append('librocm_smi64.so.2')
p['lib_whitelist'].append('librocsolver.so.0 ')
p['lib_whitelist'].append('librocblas.so.0')
if __name__ == "__main__":
sys.exit(main())
# Monkey patch to not ship libjvm.so in pypi wheels
import sys
from auditwheel.main import main
from auditwheel.policy import _POLICIES as POLICIES
# libjvm is loaded dynamically; do not include it
for p in POLICIES:
p['lib_whitelist'].append('librccl.so.1')
p['lib_whitelist'].append('libhipblas.so.0')
p['lib_whitelist'].append('libhiprand.so.1')
p['lib_whitelist'].append('librocrand.so.1')
p['lib_whitelist'].append('libMIOpen.so.1')
p['lib_whitelist'].append('libgalaxyhip.so.5')
p['lib_whitelist'].append('librocm_smi64.so.2')
p['lib_whitelist'].append('librocsolver.so.0 ')
p['lib_whitelist'].append('librocblas.so.0')
if __name__ == "__main__":
sys.exit(main())
...@@ -130,6 +130,9 @@ if __name__ == "__main__": ...@@ -130,6 +130,9 @@ if __name__ == "__main__":
files = filter( files = filter(
lambda p: p.suffix in exts, pathlib.Path(args.source_dir).rglob("*"), lambda p: p.suffix in exts, pathlib.Path(args.source_dir).rglob("*"),
) )
files = filter(
lambda p: not "oneflow/user/kernels/fmha_flash_attention/" in str(p), files,
)
loop = asyncio.get_event_loop() loop = asyncio.get_event_loop()
files = [str(f) for f in files] files = [str(f) for f in files]
clang_fmt_args = "-dry-run --Werror" clang_fmt_args = "-dry-run --Werror"
......
...@@ -59,9 +59,9 @@ async def run_command(cmd=None, dry=False, name=None): ...@@ -59,9 +59,9 @@ async def run_command(cmd=None, dry=False, name=None):
def download(build_dir, dry=False) -> Optional[List[str]]: def download(build_dir, dry=False) -> Optional[List[str]]:
urls = [ urls = [
"https://github.com/Oneflow-Inc/llvm-project/releases/download/llvmorg-13.0.0-maybe/clang-tidy-13.AppImage" "https://github.com/Oneflow-Inc/llvm-project/releases/download/maybe-14.0.4/clang-tidy-14.AppImage"
if os.getenv("CI") if os.getenv("CI")
else "https://oneflow-static.oss-cn-beijing.aliyuncs.com/bin/clang-tidy/linux-x86_64/clang-tidy-13.AppImage", else "https://oneflow-static.oss-cn-beijing.aliyuncs.com/bin/clang-tidy/linux-x86_64/clang-tidy-14.AppImage",
"https://raw.githubusercontent.com/oneflow-inc/llvm-project/maybe/clang-tools-extra/clang-tidy/tool/clang-tidy-diff.py", "https://raw.githubusercontent.com/oneflow-inc/llvm-project/maybe/clang-tools-extra/clang-tidy/tool/clang-tidy-diff.py",
] ]
dst_dir = f"{build_dir}/cache/bin" dst_dir = f"{build_dir}/cache/bin"
......
set -ex set -ex
export PATH=/usr/lib/llvm-12/bin:/usr/lib/llvm-13/bin:/usr/lib64/ccache:/root/.local/bin:$PATH export PATH=/usr/lib/llvm-15/bin:/usr/lib64/ccache:/root/.local/bin:$PATH
# clean python dir # clean python dir
cd ${ONEFLOW_CI_SRC_DIR} cd ${ONEFLOW_CI_SRC_DIR}
${ONEFLOW_CI_PYTHON_EXE} -m pip install -i https://mirrors.aliyun.com/pypi/simple --user -r ci/fixed-dev-requirements.txt ${ONEFLOW_CI_PYTHON_EXE} -m pip install -i https://mirrors.aliyun.com/pypi/simple --user -r ci/fixed-dev-requirements.txt
cd python cd python
git config --global --add safe.directory ${ONEFLOW_CI_SRC_DIR}
git clean -nXd -e \!dist -e \!dist/** git clean -nXd -e \!dist -e \!dist/**
git clean -fXd -e \!dist -e \!dist/** git clean -fXd -e \!dist -e \!dist/**
......
...@@ -6,9 +6,11 @@ ld --version ...@@ -6,9 +6,11 @@ ld --version
# clean python dir # clean python dir
cd ${ONEFLOW_CI_SRC_DIR} cd ${ONEFLOW_CI_SRC_DIR}
${ONEFLOW_CI_PYTHON_EXE} -m pip install -i https://mirrors.aliyun.com/pypi/simple --user -r ci/fixed-dev-requirements.txt ${ONEFLOW_CI_PYTHON_EXE} -m pip install -i https://mirrors.aliyun.com/pypi/simple --user -r ci/fixed-dev-requirements.txt
${ONEFLOW_CI_PYTHON_EXE} -m pip install -i https://mirrors.aliyun.com/pypi/simple --user auditwheel
cd python cd python
function clean_artifacts { function clean_artifacts {
git config --global --add safe.directory ${ONEFLOW_CI_SRC_DIR}
git clean -nXd -e \!dist -e \!dist/** git clean -nXd -e \!dist -e \!dist/**
git clean -fXd -e \!dist -e \!dist/** git clean -fXd -e \!dist -e \!dist/**
} }
......
...@@ -5,9 +5,11 @@ ld --version ...@@ -5,9 +5,11 @@ ld --version
# clean python dir # clean python dir
cd ${ONEFLOW_CI_SRC_DIR} cd ${ONEFLOW_CI_SRC_DIR}
${ONEFLOW_CI_PYTHON_EXE} -m pip install -i https://mirrors.aliyun.com/pypi/simple --user -r ci/fixed-dev-requirements.txt ${ONEFLOW_CI_PYTHON_EXE} -m pip install -i https://mirrors.aliyun.com/pypi/simple --user -r ci/fixed-dev-requirements.txt
${ONEFLOW_CI_PYTHON_EXE} -m pip install -i https://mirrors.aliyun.com/pypi/simple --user auditwheel
cd python cd python
function clean_artifacts { function clean_artifacts {
git config --global --add safe.directory ${ONEFLOW_CI_SRC_DIR}
git clean -nXd -e \!dist -e \!dist/** git clean -nXd -e \!dist -e \!dist/**
git clean -fXd -e \!dist -e \!dist/** git clean -fXd -e \!dist -e \!dist/**
} }
......
#!/bin/bash
set -xe
src_dir=${ONEFLOW_SRC_DIR:-"$PWD"}
test_tmp_dir=${ONEFLOW_TEST_TMP_DIR:-"./test_tmp_dir"}
rm -rf $test_tmp_dir
mkdir -p $test_tmp_dir
cp -r $src_dir/python/oneflow/compatible/single_client/test $test_tmp_dir
cd $test_tmp_dir
export ONEFLOW_TEST_DEVICE_NUM=1
python3 -m unittest discover test/serving --failfast --verbose
#!/bin/bash
set -xe
export PYTHONUNBUFFERED=1
src_dir=${ONEFLOW_SRC_DIR:-"$PWD"}
test_tmp_dir=${ONEFLOW_TEST_TMP_DIR:-"./test_tmp_dir"}
rm -rf $test_tmp_dir
mkdir -p $test_tmp_dir
cp -r $src_dir/python/oneflow/compatible/single_client/benchmarks $test_tmp_dir
cd $test_tmp_dir/benchmarks
export ONEFLOW_DRY_RUN=1
# turn on ONEFLOW_DEBUG_MODE will cause protobuf err
# export ONEFLOW_DEBUG_MODE=1
node_num=2
generated_node_list=$(seq -f "mockhost%02g" -s, $node_num)
# heaptrack
# valgrind --tool=massif --threshold=0.0001
# /usr/bin/time -v
time python3 bert_benchmark/run_pretraining.py \
--learning_rate=1e-4 \
--weight_decay_rate=0.01 \
--batch_size_per_device=24 \
--iter_num=5 \
--loss_print_every_n_iter=1 \
--data_dir="/dataset/bert/bert_seq_len_128_repeat1024" \
--data_part_num=1 \
--seq_length=128 \
--max_predictions_per_seq=20 \
--num_hidden_layers=12 \
--num_attention_heads=12 \
--max_position_embeddings=512 \
--type_vocab_size=2 \
--vocab_size=30522 \
--attention_probs_dropout_prob=0.1 \
--hidden_dropout_prob=0.1 \
--hidden_size_per_head=64 \
--node_list=${generated_node_list} \
--node_num=${node_num} \
--gpu_num_per_node=8
#!/bin/bash
set -e
MOCK_UNITTEST=$PWD/python/oneflow/test/misc/test_mock_scope.py
python3 $MOCK_UNITTEST --failfast --verbose
# testing import *
python3 -c "
import oneflow
import oneflow.nn
import oneflow.mock_torch as mock; mock.enable();
from torch.sbp import *; assert(sbp == oneflow.sbp.sbp);
from torch import *; assert(randn == oneflow.randn);
from torch.nn import *; assert(Graph == oneflow.nn.Graph);
mock.disable();
from torch import *; assert(randn != oneflow.randn);
from torch.nn import *; assert(Graph != oneflow.nn.Graph);
"
#!/bin/bash
set -e
MOCK_TORCH=$PWD/python/oneflow/test/misc/test_mock_simple.py
same_or_exit() {
if [[ "$(python3 $MOCK_TORCH)" != *"$1"* ]]; then
exit 1
fi
}
eval $(python3 -m oneflow.mock_torch) # test call to python module, default argument is enable
same_or_exit "True"
# testing import
python3 -c 'import torch; torch.randn(2,3)'
python3 -c 'import torch.nn; torch.nn.Graph'
python3 -c 'import torch.version; torch.version.__version__'
python3 -c 'from torch import *; randn(2,3)'
python3 -c 'from torch.nn import *; Graph'
python3 -c 'from torch.sbp import *; sbp'
python3 -c 'from torch import nn; nn.Graph'
python3 -c 'from torch.version import __version__'
python3 -c 'import torch; torch.no_exist' 2>&1 >/dev/null | grep -q 'NotImplementedError'
eval $(python3 -m oneflow.mock_torch disable)
same_or_exit "False"
eval $(python3 -m oneflow.mock_torch enable)
same_or_exit "True"
eval $(python3 -m oneflow.mock_torch disable) # recover
same_or_exit "False"
eval $(oneflow-mock-torch) # test scripts
same_or_exit "True"
eval $(oneflow-mock-torch disable)
same_or_exit "False"
eval $(oneflow-mock-torch enable)
same_or_exit "True"
eval $(oneflow-mock-torch disable)
same_or_exit "False"
set(BUILD_CUDA NO CACHE BOOL "")
set(BUILD_GIT_VERSION YES CACHE BOOL "")
set(BUILD_TESTING YES CACHE BOOL "")
set(WITH_ONEDNN YES CACHE BOOL "")
set(TREAT_WARNINGS_AS_ERRORS YES CACHE BOOL "")
set(THIRD_PARTY_MIRROR aliyun CACHE STRING "")
set(PIP_INDEX_MIRROR "https://pypi.tuna.tsinghua.edu.cn/simple" CACHE STRING "")
set(CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING "")
set(CMAKE_GENERATOR Ninja CACHE STRING "")
set(BUILD_CPP_API ON CACHE BOOL "")
set(WITH_MLIR ON CACHE BOOL "")
set(BUILD_FOR_CI ON CACHE BOOL "")
set(BUILD_SHARED_LIBS ON CACHE BOOL "")
set(CMAKE_C_COMPILER_LAUNCHER ccache CACHE STRING "")
set(CMAKE_CXX_COMPILER_LAUNCHER ccache CACHE STRING "")
set(CPU_THREADING_RUNTIME "SEQ" CACHE STRING "")
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION OFF CACHE BOOL "")
set(ENABLE_ASAN ON CACHE BOOL "")
set(ENABLE_UBSAN OFF CACHE BOOL "")
set(BUILD_CUDA NO CACHE BOOL "")
set(BUILD_GIT_VERSION YES CACHE BOOL "")
set(BUILD_TESTING YES CACHE BOOL "")
set(WITH_ONEDNN YES CACHE BOOL "")
set(TREAT_WARNINGS_AS_ERRORS YES CACHE BOOL "")
set(THIRD_PARTY_MIRROR aliyun CACHE STRING "")
set(PIP_INDEX_MIRROR "https://pypi.tuna.tsinghua.edu.cn/simple" CACHE STRING "")
set(CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING "")
set(CMAKE_GENERATOR Ninja CACHE STRING "")
set(BUILD_CPP_API ON CACHE BOOL "")
set(WITH_MLIR ON CACHE BOOL "")
set(BUILD_FOR_CI ON CACHE BOOL "")
set(BUILD_SHARED_LIBS ON CACHE BOOL "")
set(CMAKE_C_COMPILER_LAUNCHER ccache CACHE STRING "")
set(CMAKE_CXX_COMPILER_LAUNCHER ccache CACHE STRING "")
set(CPU_THREADING_RUNTIME "SEQ" CACHE STRING "")
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION OFF CACHE BOOL "")
set(ENABLE_TSAN ON CACHE BOOL "")
...@@ -4,6 +4,7 @@ set(CMAKE_CUDA_COMPILER "clang++" CACHE STRING "") ...@@ -4,6 +4,7 @@ set(CMAKE_CUDA_COMPILER "clang++" CACHE STRING "")
set(CMAKE_EXE_LINKER_FLAGS_INIT "-fuse-ld=lld" CACHE STRING "") set(CMAKE_EXE_LINKER_FLAGS_INIT "-fuse-ld=lld" CACHE STRING "")
set(CMAKE_MODULE_LINKER_FLAGS_INIT "-fuse-ld=lld" CACHE STRING "") set(CMAKE_MODULE_LINKER_FLAGS_INIT "-fuse-ld=lld" CACHE STRING "")
set(CMAKE_SHARED_LINKER_FLAGS_INIT "-fuse-ld=lld" CACHE STRING "") set(CMAKE_SHARED_LINKER_FLAGS_INIT "-fuse-ld=lld" CACHE STRING "")
set(WITH_MLIR YES CACHE BOOL "")
set(BUILD_SHARED_LIBS YES CACHE BOOL "") set(BUILD_SHARED_LIBS YES CACHE BOOL "")
set(BUILD_CUDA YES CACHE BOOL "") set(BUILD_CUDA YES CACHE BOOL "")
set(CMAKE_CUDA_ARCHITECTURES "75;52-real" CACHE STRING "") set(CMAKE_CUDA_ARCHITECTURES "75;52-real" CACHE STRING "")
......
set(BUILD_SHARED_LIBS YES CACHE BOOL "")
# uncomment only if you know what you are doing
# set(CMAKE_LINK_DEPENDS_NO_SHARED YES CACHE BOOL "")
set(BUILD_CUDA YES CACHE BOOL "")
set(BUILD_GIT_VERSION NO CACHE BOOL "")
set(TREAT_WARNINGS_AS_ERRORS YES CACHE BOOL "")
set(BUILD_HWLOC NO CACHE BOOL "")
set(BUILD_TESTING OFF CACHE BOOL "")
set(WITH_MLIR YES CACHE BOOL "")
set(WITH_MLIR_CUDA_CODEGEN YES CACHE BOOL "")
set(THIRD_PARTY_MIRROR aliyun CACHE STRING "")
set(PIP_INDEX_MIRROR "https://pypi.tuna.tsinghua.edu.cn/simple" CACHE STRING "")
set(CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING "")
set(CMAKE_GENERATOR Ninja CACHE STRING "")
set(CMAKE_CUDA_ARCHITECTURES "80" CACHE STRING "")
set(CUDA_TOOLKIT_ROOT_DIR /usr/local/cuda CACHE STRING "")
set(CUDNN_ROOT_DIR /usr/local/cudnn CACHE STRING "")
set(CMAKE_C_COMPILER_LAUNCHER ccache CACHE STRING "")
set(CMAKE_CXX_COMPILER_LAUNCHER ccache CACHE STRING "")
set(CMAKE_CUDA_COMPILER_LAUNCHER ccache CACHE STRING "")
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION OFF CACHE BOOL "")
set(CMAKE_EXE_LINKER_FLAGS_INIT "-fuse-ld=lld" CACHE STRING "")
set(CMAKE_MODULE_LINKER_FLAGS_INIT "-fuse-ld=lld" CACHE STRING "")
set(CMAKE_SHARED_LINKER_FLAGS_INIT "-fuse-ld=lld" CACHE STRING "")
set(CPU_THREADING_RUNTIME SEQ CACHE STRING
"when using lld with TBB enabled, there will be linkage error")
set(BUILD_HWLOC OFF CACHE BOOL "")
set(WITH_ONEDNN OFF CACHE BOOL "")
set(BUILD_SHARED_LIBS YES CACHE BOOL "")
# uncomment only if you know what you are doing
# set(CMAKE_LINK_DEPENDS_NO_SHARED YES CACHE BOOL "")
set(BUILD_CUDA YES CACHE BOOL "")
set(BUILD_GIT_VERSION NO CACHE BOOL "")
set(TREAT_WARNINGS_AS_ERRORS YES CACHE BOOL "")
set(BUILD_HWLOC NO CACHE BOOL "")
set(BUILD_TESTING OFF CACHE BOOL "")
set(WITH_MLIR YES CACHE BOOL "")
set(WITH_MLIR_CUDA_CODEGEN YES CACHE BOOL "")
set(THIRD_PARTY_MIRROR aliyun CACHE STRING "")
set(PIP_INDEX_MIRROR "https://pypi.tuna.tsinghua.edu.cn/simple" CACHE STRING "")
set(CMAKE_BUILD_TYPE RelWithDebInfo CACHE STRING "")
set(CMAKE_GENERATOR Ninja CACHE STRING "")
set(CMAKE_CUDA_ARCHITECTURES "86" CACHE STRING "")
set(CUDA_TOOLKIT_ROOT_DIR /usr/local/cuda CACHE STRING "")
set(CUDNN_ROOT_DIR /usr/local/cudnn CACHE STRING "")
set(CMAKE_C_COMPILER_LAUNCHER ccache CACHE STRING "")
set(CMAKE_CXX_COMPILER_LAUNCHER ccache CACHE STRING "")
set(CMAKE_CUDA_COMPILER_LAUNCHER ccache CACHE STRING "")
set(CMAKE_INTERPROCEDURAL_OPTIMIZATION OFF CACHE BOOL "")
set(CMAKE_EXE_LINKER_FLAGS_INIT "-fuse-ld=lld" CACHE STRING "")
set(CMAKE_MODULE_LINKER_FLAGS_INIT "-fuse-ld=lld" CACHE STRING "")
set(CMAKE_SHARED_LINKER_FLAGS_INIT "-fuse-ld=lld" CACHE STRING "")
set(CPU_THREADING_RUNTIME SEQ CACHE STRING
"when using lld with TBB enabled, there will be linkage error")
set(BUILD_HWLOC OFF CACHE BOOL "")
set(WITH_ONEDNN OFF CACHE BOOL "")
option(CUDA_STATIC "" ON)
if(BUILD_CUDA) if(BUILD_CUDA)
if((NOT CUDA_STATIC) OR BUILD_SHARED_LIBS)
set(OF_CUDA_LINK_DYNAMIC_LIBRARY ON)
else()
set(OF_CUDA_LINK_DYNAMIC_LIBRARY OFF)
endif()
if(DEFINED CUDA_TOOLKIT_ROOT_DIR) if(DEFINED CUDA_TOOLKIT_ROOT_DIR)
message(WARNING "CUDA_TOOLKIT_ROOT_DIR is deprecated, use CUDAToolkit_ROOT instead") message(WARNING "CUDA_TOOLKIT_ROOT_DIR is deprecated, use CUDAToolkit_ROOT instead")
set(CUDAToolkit_ROOT ${CUDA_TOOLKIT_ROOT_DIR}) set(CUDAToolkit_ROOT ${CUDA_TOOLKIT_ROOT_DIR})
...@@ -32,9 +25,22 @@ if(BUILD_CUDA) ...@@ -32,9 +25,22 @@ if(BUILD_CUDA)
message(FATAL_ERROR "CUDA_VERSION empty") message(FATAL_ERROR "CUDA_VERSION empty")
endif() endif()
message(STATUS "CUDA_VERSION: ${CUDA_VERSION}") message(STATUS "CUDA_VERSION: ${CUDA_VERSION}")
if(CUDA_VERSION VERSION_GREATER_EQUAL "11.6")
set(CUDA_STATIC OFF CACHE BOOL "")
else()
set(CUDA_STATIC ON CACHE BOOL "")
endif()
if((NOT CUDA_STATIC) OR BUILD_SHARED_LIBS)
set(OF_CUDA_LINK_DYNAMIC_LIBRARY ON)
else()
set(OF_CUDA_LINK_DYNAMIC_LIBRARY OFF)
endif()
if(OF_CUDA_LINK_DYNAMIC_LIBRARY) if(OF_CUDA_LINK_DYNAMIC_LIBRARY)
list(APPEND VENDOR_CUDA_LIBRARIES CUDA::cublas) list(APPEND VENDOR_CUDA_LIBRARIES CUDA::cublas)
list(APPEND VENDOR_CUDA_LIBRARIES CUDA::curand) list(APPEND VENDOR_CUDA_LIBRARIES CUDA::curand)
list(APPEND VENDOR_CUDA_LIBRARIES CUDA::cufft)
if(CUDA_VERSION VERSION_GREATER_EQUAL "10.1") if(CUDA_VERSION VERSION_GREATER_EQUAL "10.1")
list(APPEND VENDOR_CUDA_LIBRARIES CUDA::cublasLt) list(APPEND VENDOR_CUDA_LIBRARIES CUDA::cublasLt)
endif() endif()
...@@ -46,6 +52,7 @@ if(BUILD_CUDA) ...@@ -46,6 +52,7 @@ if(BUILD_CUDA)
else() else()
list(APPEND VENDOR_CUDA_LIBRARIES CUDA::cublas_static) list(APPEND VENDOR_CUDA_LIBRARIES CUDA::cublas_static)
list(APPEND VENDOR_CUDA_LIBRARIES CUDA::curand_static) list(APPEND VENDOR_CUDA_LIBRARIES CUDA::curand_static)
list(APPEND VENDOR_CUDA_LIBRARIES CUDA::cufft_static)
if(CUDA_VERSION VERSION_GREATER_EQUAL "10.1") if(CUDA_VERSION VERSION_GREATER_EQUAL "10.1")
list(APPEND VENDOR_CUDA_LIBRARIES CUDA::cublasLt_static) list(APPEND VENDOR_CUDA_LIBRARIES CUDA::cublasLt_static)
endif() endif()
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment