2.5

992bec46 · “yuguo” · 0259837d · 992bec46 · 992bec46 · 0259837d
Commit 992bec46 authored Oct 08, 2023 by “yuguo”
20 changed files
--- a/.clang-tidy
+++ b/.clang-tidy
+---
+Checks: '
+bugprone-argument-comment,
+-bugprone-assert-side-effect,
+-bugprone-bad-signal-to-kill-thread,
+-bugprone-bool-pointer-implicit-conversion,
+-bugprone-branch-clone,
+bugprone-copy-constructor-init,
+-bugprone-dangling-handle,
+-bugprone-dynamic-static-initializers,
+-bugprone-exception-escape,
+-bugprone-fold-init-type,
+-bugprone-forwarding-reference-overload,
+-bugprone-inaccurate-erase,
+-bugprone-incorrect-roundings,
+-bugprone-infinite-loop,
+bugprone-integer-division,
+-bugprone-macro-repeated-side-effects,
+-bugprone-misplaced-operator-in-strlen-in-alloc,
+-bugprone-misplaced-widening-cast,
+-bugprone-move-forwarding-reference,
+-bugprone-multiple-statement-macro,
+-bugprone-narrowing-conversions,
+-bugprone-not-null-terminated-result,
+-bugprone-parent-virtual-call,
+-bugprone-posix-return,
+-bugprone-signed-char-misuse,
+-bugprone-sizeof-container,
+-bugprone-sizeof-expression,
+-bugprone-string-constructor,
+-bugprone-string-integer-assignment,
+-bugprone-string-literal-with-embedded-nul,
+-bugprone-suspicious-enum-usage,
+-bugprone-suspicious-memset-usage,
+-bugprone-suspicious-missing-comma,
+-bugprone-suspicious-semicolon,
+-bugprone-suspicious-string-compare,
+-bugprone-terminating-continue,
+-bugprone-throw-keyword-missing,
+-bugprone-too-small-loop-variable,
+-bugprone-undefined-memory-manipulation,
+-bugprone-undelegated-constructor,
+-bugprone-unhandled-self-assignment,
+bugprone-unused-raii,
+-bugprone-unused-return-value,
+-bugprone-use-after-move,
+-bugprone-virtual-near-miss,
+-clang-analyzer-apiModeling.StdCLibraryFunctions,
+-clang-analyzer-apiModeling.TrustNonnull,
+-clang-analyzer-apiModeling.google.GTest,
+-clang-analyzer-apiModeling.llvm.CastValue,
+-clang-analyzer-apiModeling.llvm.ReturnValue,
+-clang-analyzer-core.CallAndMessage,
+-clang-analyzer-core.DivideZero,
+-clang-analyzer-core.DynamicTypePropagation,
+-clang-analyzer-core.NonNullParamChecker,
+-clang-analyzer-core.NonnilStringConstants,
+-clang-analyzer-core.NullDereference,
+-clang-analyzer-core.StackAddrEscapeBase,
+-clang-analyzer-core.StackAddressEscape,
+-clang-analyzer-core.UndefinedBinaryOperatorResult,
+-clang-analyzer-core.VLASize,
+-clang-analyzer-core.builtin.BuiltinFunctions,
+-clang-analyzer-core.builtin.NoReturnFunctions,
+-clang-analyzer-core.uninitialized.ArraySubscript,
+-clang-analyzer-core.uninitialized.Assign,
+-clang-analyzer-core.uninitialized.Branch,
+-clang-analyzer-core.uninitialized.CapturedBlockVariable,
+-clang-analyzer-core.uninitialized.UndefReturn,
+-clang-analyzer-cplusplus.InnerPointer,
+-clang-analyzer-cplusplus.Move,
+-clang-analyzer-cplusplus.NewDelete,
+-clang-analyzer-cplusplus.NewDeleteLeaks,
+-clang-analyzer-cplusplus.PureVirtualCall,
+-clang-analyzer-cplusplus.SelfAssignment,
+-clang-analyzer-cplusplus.SmartPtr,
+-clang-analyzer-cplusplus.VirtualCallModeling,
+-clang-analyzer-deadcode.DeadStores,
+-clang-analyzer-fuchsia.HandleChecker,
+-clang-analyzer-nullability.NullPassedToNonnull,
+-clang-analyzer-nullability.NullReturnedFromNonnull,
+-clang-analyzer-nullability.NullabilityBase,
+-clang-analyzer-nullability.NullableDereferenced,
+-clang-analyzer-nullability.NullablePassedToNonnull,
+-clang-analyzer-nullability.NullableReturnedFromNonnull,
+-clang-analyzer-optin.cplusplus.UninitializedObject,
+-clang-analyzer-optin.cplusplus.VirtualCall,
+-clang-analyzer-optin.mpi.MPI-Checker,
+-clang-analyzer-optin.osx.OSObjectCStyleCast,
+-clang-analyzer-optin.osx.cocoa.localizability.EmptyLocalizationContextChecker,
+-clang-analyzer-optin.osx.cocoa.localizability.NonLocalizedStringChecker,
+-clang-analyzer-optin.performance.GCDAntipattern,
+-clang-analyzer-optin.performance.Padding,
+-clang-analyzer-optin.portability.UnixAPI,
+-clang-analyzer-osx.API,
+-clang-analyzer-osx.MIG,
+-clang-analyzer-osx.NSOrCFErrorDerefChecker,
+-clang-analyzer-osx.NumberObjectConversion,
+-clang-analyzer-osx.OSObjectRetainCount,
+-clang-analyzer-osx.ObjCProperty,
+-clang-analyzer-osx.SecKeychainAPI,
+-clang-analyzer-osx.cocoa.AtSync,
+-clang-analyzer-osx.cocoa.AutoreleaseWrite,
+-clang-analyzer-osx.cocoa.ClassRelease,
+-clang-analyzer-osx.cocoa.Dealloc,
+-clang-analyzer-osx.cocoa.IncompatibleMethodTypes,
+-clang-analyzer-osx.cocoa.Loops,
+-clang-analyzer-osx.cocoa.MissingSuperCall,
+-clang-analyzer-osx.cocoa.NSAutoreleasePool,
+-clang-analyzer-osx.cocoa.NSError,
+-clang-analyzer-osx.cocoa.NilArg,
+-clang-analyzer-osx.cocoa.NonNilReturnValue,
+-clang-analyzer-osx.cocoa.ObjCGenerics,
+-clang-analyzer-osx.cocoa.RetainCount,
+-clang-analyzer-osx.cocoa.RetainCountBase,
+-clang-analyzer-osx.cocoa.RunLoopAutoreleaseLeak,
+-clang-analyzer-osx.cocoa.SelfInit,
+-clang-analyzer-osx.cocoa.SuperDealloc,
+-clang-analyzer-osx.cocoa.UnusedIvars,
+-clang-analyzer-osx.cocoa.VariadicMethodTypes,
+-clang-analyzer-osx.coreFoundation.CFError,
+-clang-analyzer-osx.coreFoundation.CFNumber,
+-clang-analyzer-osx.coreFoundation.CFRetainRelease,
+-clang-analyzer-osx.coreFoundation.containers.OutOfBounds,
+-clang-analyzer-osx.coreFoundation.containers.PointerSizedValues,
+-clang-analyzer-security.FloatLoopCounter,
+-clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
+-clang-analyzer-security.insecureAPI.SecuritySyntaxChecker,
+-clang-analyzer-security.insecureAPI.UncheckedReturn,
+-clang-analyzer-security.insecureAPI.bcmp,
+-clang-analyzer-security.insecureAPI.bcopy,
+-clang-analyzer-security.insecureAPI.bzero,
+-clang-analyzer-security.insecureAPI.decodeValueOfObjCType,
+-clang-analyzer-security.insecureAPI.getpw,
+-clang-analyzer-security.insecureAPI.gets,
+-clang-analyzer-security.insecureAPI.mkstemp,
+-clang-analyzer-security.insecureAPI.mktemp,
+-clang-analyzer-security.insecureAPI.rand,
+-clang-analyzer-security.insecureAPI.strcpy,
+clang-analyzer-security.insecureAPI.vfork,
+-clang-analyzer-unix.API,
+-clang-analyzer-unix.DynamicMemoryModeling,
+-clang-analyzer-unix.Malloc,
+-clang-analyzer-unix.MallocSizeof,
+-clang-analyzer-unix.MismatchedDeallocator,
+clang-analyzer-unix.Vfork,
+-clang-analyzer-unix.cstring.BadSizeArg,
+-clang-analyzer-unix.cstring.CStringModeling,
+-clang-analyzer-unix.cstring.NullArg,
+-clang-analyzer-valist.CopyToSelf,
+-clang-analyzer-valist.Uninitialized,
+-clang-analyzer-valist.Unterminated,
+-clang-analyzer-valist.ValistBase,
+cppcoreguidelines-avoid-c-arrays,
+-cppcoreguidelines-avoid-goto,
+-cppcoreguidelines-c-copy-assignment-signature,
+-cppcoreguidelines-explicit-virtual-functions,
+-cppcoreguidelines-init-variables,
+-cppcoreguidelines-narrowing-conversions,
+-cppcoreguidelines-no-malloc,
+-cppcoreguidelines-pro-type-const-cast,
+-cppcoreguidelines-pro-type-member-init,
+-cppcoreguidelines-slicing,
+-hicpp-avoid-goto,
+-hicpp-exception-baseclass,
+-misc-unused-alias-decls,
+-misc-unused-using-decls,
+modernize-avoid-bind,
+modernize-avoid-c-arrays,
+-modernize-deprecated-headers,
+-modernize-deprecated-ios-base-aliases,
+modernize-loop-convert,
+-modernize-make-shared,
+modernize-make-unique,
+-modernize-pass-by-value,
+-modernize-raw-string-literal,
+modernize-redundant-void-arg,
+-modernize-replace-auto-ptr,
+-modernize-replace-random-shuffle,
+-modernize-shrink-to-fit,
+-modernize-unary-static-assert,
+modernize-use-bool-literals,
+modernize-use-emplace,
+modernize-use-equals-default,
+-modernize-use-equals-delete,
+-modernize-use-noexcept,
+modernize-use-nullptr,
+modernize-use-override,
+-modernize-use-transparent-functors,
+-modernize-use-uncaught-exceptions,
+performance-faster-string-find,
+-performance-for-range-copy,
+-performance-implicit-conversion-in-loop,
+-performance-inefficient-algorithm,
+-performance-inefficient-string-concatenation,
+-performance-inefficient-vector-operation,
+-performance-move-const-arg,
+-performance-move-constructor-init,
+-performance-no-automatic-move,
+-performance-noexcept-move-constructor,
+-performance-trivially-destructible,
+-performance-type-promotion-in-math-fn,
+-performance-unnecessary-copy-initialization,
+readability-container-size-empty,
+'
+HeaderFilterRegex: '^(paddle/(?!cinn)).*$'
+AnalyzeTemporaryDtors: false
+WarningsAsErrors: '*'
+...
--- a/.cmake-format.py
+++ b/.cmake-format.py
 # Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
-# 
+#
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
-# 
+#
 #     http://www.apache.org/licenses/LICENSE-2.0
-# 
+#
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -16,7 +16,6 @@
 # Options affecting formatting.
 # -----------------------------
 with section("format"):
    # How wide to allow formatted cmake files
    line_width = 80
@@ -50,12 +49,6 @@ with section("parse"):
                "DEPS": '*',
            }
        },
-        "hip_library": {
-            "kwargs": {
-                "SRCS": '*',
-                "DEPS": '*',
-            }
-        },
        "xpu_library": {
            "kwargs": {
                "SRCS": '*',
@@ -68,12 +61,6 @@ with section("parse"):
                "DEPS": '*',
            }
        },
-        "hip_library": {
-            "kwargs": {
-                "SRCS": '*',
-                "DEPS": '*',
-            }
-        },
        "go_library": {
            "kwargs": {
                "SRCS": '*',
@@ -121,5 +108,5 @@ with section("parse"):
                "SRCS": '*',
                "DEPS": '*',
            }
-        }
+        },
    }
--- a/.dockerignore
+++ b/.dockerignore
-*.DS_Store
-build/
-*.user
-.vscode
-.idea
-.project
-.cproject
-.pydevproject
-Makefile
-.test_env/
-third_party/
-*~
-bazel-*
-!build/*.deb
--- a/.editorconfig
+++ b/.editorconfig
+# EditorConfig is a cross-editor configuration file
+# that helps to unify code styles for multiple
+# developers collaborative projects.
+# See more at https://editorconfig.org/
+root = true
+[*]
+indent_style = space
+end_of_line = lf
+charset = utf-8
+trim_trailing_whitespace = true
+insert_final_newline = true
+[*.{c,cc,cxx,cpp,cu,cuh,h,hpp,hxx,kps}]
+indent_size = 2
+[*.{py,java,r}]
+indent_size = 4
+[Dockerfile.*]
+indent_size = 4
+[.flake8]
+indent_size = 4
+[*.go]
+indent_style = tab
+indent_size = 4
--- a/.flake8
+++ b/.flake8
+[flake8]
+select = C,E,W
+exclude =
+    ./build,
+    # Exclude fluid directory
+    ./python/paddle/fluid/**,
+    # Exclude third-party libraries
+    ./third_party/**,
+    ./python/paddle/utils/gast/**,
+ignore =
+    # Whitespace before ‘,’, ‘;’, or ‘:’, it is not compatible with black
+    E203,
+    # Module level import not at top of file
+    E402,
+    # Line too long (82 > 79 characters)
+    E501,
+    # Do not compare types, use `isinstance()`
+    E721,
+    # Do not use bare except, specify exception instead
+    E722,
+    # Do not assign a lambda expression, use a def
+    E731,
+    # Do not use variables named ‘l’, ‘O’, or ‘I’
+    E741,
+    # Line break before binary operator, it is not compatible with black
+    W503
+per-file-ignores =
+    # These files need tabs for testing.
+    test/dygraph_to_static/test_error.py:E101,W191
--- a/.gitmodules
+++ b/.gitmodules
+[submodule "third_party/protobuf"]
+	path = third_party/protobuf
+	url = https://github.com/protocolbuffers/protobuf.git
+	ignore = dirty
+[submodule "third_party/pocketfft"]
+	path = third_party/pocketfft
+	url = https://gitlab.mpcdf.mpg.de/mtr/pocketfft.git
+	ignore = dirty
+[submodule "third_party/gflags"]
+	path = third_party/gflags
+	url = https://github.com/gflags/gflags.git
+	ignore = dirty
+[submodule "third_party/gloo"]
+	path = third_party/gloo
+	url = https://github.com/ziyoujiyi/gloo.git
+	ignore = dirty
+[submodule "third_party/dlpack"]
+	path = third_party/dlpack
+	url = https://github.com/dmlc/dlpack.git
+	ignore = dirty
+[submodule "third_party/utf8proc"]
+	path = third_party/utf8proc
+	url = https://github.com/JuliaStrings/utf8proc.git
+	ignore = dirty
+[submodule "third_party/warpctc"]
+	path = third_party/warpctc
+	url = https://github.com/baidu-research/warp-ctc.git
+	ignore = dirty
+[submodule "third_party/warprnnt"]
+	path = third_party/warprnnt
+	url = https://github.com/PaddlePaddle/warp-transducer.git
+	ignore = dirty
+[submodule "third_party/xxhash"]
+	path = third_party/xxhash
+	url = https://github.com/Cyan4973/xxHash.git
+	ignore = dirty
+[submodule "third_party/pybind"]
+	path = third_party/pybind
+	url = https://github.com/pybind/pybind11.git
+	ignore = dirty
+[submodule "third_party/threadpool"]
+	path = third_party/threadpool
+	url = https://github.com/progschj/ThreadPool.git
+	ignore = dirty
+[submodule "third_party/zlib"]
+	path = third_party/zlib
+	url = https://github.com/madler/zlib.git
+	ignore = dirty
+[submodule "third_party/glog"]
+	path = third_party/glog
+	url = https://github.com/google/glog.git
+	ignore = dirty
+[submodule "third_party/eigen3"]
+	path = third_party/eigen3
+	url = https://gitlab.com/libeigen/eigen.git
+	ignore = dirty
+[submodule "third_party/snappy"]
+	path = third_party/snappy
+	url = https://github.com/google/snappy.git
+	ignore = dirty
+[submodule "third_party/cub"]
+	path = third_party/cub
+	url = https://github.com/NVIDIA/cub.git
+	ignore = dirty
+[submodule "third_party/cutlass"]
+	path = third_party/cutlass
+	url = https://github.com/NVIDIA/cutlass.git
+	ignore = dirty
+[submodule "third_party/xbyak"]
+	path = third_party/xbyak
+	url = https://github.com/herumi/xbyak.git
+	ignore = dirty
+[submodule "third_party/mkldnn"]
+	path = third_party/mkldnn
+	url = https://github.com/oneapi-src/oneDNN.git
+	ignore = dirty
+[submodule "third_party/flashattn"]
+	path = third_party/flashattn
+	url = https://github.com/PaddlePaddle/flash-attention.git
+	ignore = dirty
+[submodule "third_party/gtest"]
+	path = third_party/gtest
+	url = https://github.com/google/googletest.git
+	ignore = dirty
+[submodule "third_party/openblas"]
+	path = third_party/openblas
+	url = https://github.com/xianyi/OpenBLAS.git
+	ignore = dirty
+[submodule "third_party/leveldb"]
+	path = third_party/leveldb
+	url = https://github.com/google/leveldb.git
+	ignore = dirty
+[submodule "third_party/brpc"]
+	path = third_party/brpc
+	url = https://github.com/apache/brpc.git
+	ignore = dirty
+[submodule "third_party/rocksdb"]
+	path = third_party/rocksdb
+	url = https://github.com/Thunderbrook/rocksdb
+	ignore = dirty
+[submodule "third_party/absl"]
+	path = third_party/absl
+	url = https://github.com/abseil/abseil-cpp.git
+	ignore = dirty
+[submodule "third_party/jitify"]
+	path = third_party/jitify
+	url = https://github.com/NVIDIA/jitify.git
+	ignore = dirty
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
+# Exclude all third-party libraries and auto-generated files globally
+exclude: |
+    (?x)^(
+        patches/.+|
+        paddle/fluid/framework/fleet/heter_ps/cudf/.+|
+        paddle/fluid/distributed/ps/thirdparty/round_robin.h|
+        python/paddle/utils/gast/.+|
+        third_party/.+
+    )$
 repos:
-   repo: https://github.com/Lucas-C/pre-commit-hooks.git
+# Common hooks
-    rev: v1.1.14
-    hooks:
-    -   id: remove-crlf
-        files: (?!.*third_party)^.*$ | (?!.*book)^.*$
-   repo: https://github.com/psf/black.git
-    rev: 22.8.0
-    hooks:
-    -   id: black
-        files: (.*\.(py|bzl)|BUILD|.*\.BUILD|WORKSPACE)$
-        exclude: |
-            (?x)^(
-                python/paddle/fluid/tests/unittests/dygraph_to_static/test_error.py|
-                python/paddle/fluid/tests/unittests/dygraph_to_static/test_origin_info.py
-            )$
 -   repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.1.0
+    rev: v4.4.0
    hooks:
    -   id: check-added-large-files
    -   id: check-merge-conflict
    -   id: check-symlinks
    -   id: detect-private-key
-        files: (?!.*third_party)^.*$ | (?!.*book)^.*$
    -   id: end-of-file-fixer
    -   id: sort-simple-yaml
-        files: (op|backward|op_[a-z_]+)\.yaml$
+        files: (ops|backward|op_[a-z_]+)\.yaml$
-   repo: local
+    -   id: trailing-whitespace
+        files: (.*\.(py|bzl|md|rst|c|cc|cxx|cpp|cu|h|hpp|hxx|xpu|kps|cmake|yaml|yml|hook)|BUILD|.*\.BUILD|WORKSPACE|CMakeLists\.txt)$
+-   repo: https://github.com/Lucas-C/pre-commit-hooks.git
+    rev: v1.5.1
    hooks:
-    -   id: clang-format
+    -   id: remove-crlf
-        name: clang-format
+    -   id: remove-tabs
-        description: Format files with ClangFormat.
+        name: Tabs remover (C++)
-        entry: bash ./tools/codestyle/clang_format.hook -i
-        language: system
        files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|xpu|kps)$
+        args: [--whitespaces-count, '2']
+    -   id: remove-tabs
+        name: Tabs remover (Python)
+        files: (.*\.(py|bzl)|BUILD|.*\.BUILD|WORKSPACE)$
+        args: [--whitespaces-count, '4']
+        # Exclude some unit test files that require tabs.
        exclude: |
            (?x)^(
-                paddle/fluid/distributed/ps/thirdparty/round_robin.h
+                test/dygraph_to_static/test_error.py
            )$
 -   repo: local
    hooks:
-    -   id: cpplint-cpp-source
+    -   id: copyright_checker
-        name: cpplint
+        name: copyright_checker
-        description: Check C++ code style using cpplint.py.
+        entry: python ./tools/codestyle/copyright.hook
-        entry: bash ./tools/codestyle/cpplint_pre_commit.hook
        language: system
-        files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx)$
+        files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|xpu|kps|py|sh)$
+        exclude: |
+            (?x)^(
+                paddle/utils/.*|
+                paddle/cinn/utils/registry.h
+            )$
+# For Python files
+-   repo: https://github.com/psf/black.git
+    rev: 23.3.0
+    hooks:
+    -   id: black
+        files: (.*\.(py|pyi|bzl)|BUILD|.*\.BUILD|WORKSPACE)$
+-   repo: https://github.com/pycqa/isort
+    rev: 5.11.5
+    hooks:
+    -   id: isort
+-   repo: https://github.com/PyCQA/flake8
+    rev: 5.0.4
+    hooks:
+    -   id: flake8
+        args: ["--config=.flake8"]
+-   repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.0.272
+    hooks:
+    -   id: ruff
+        args: [--fix, --exit-non-zero-on-fix, --no-cache]
 -   repo: local
    hooks:
    -   id: pylint-doc-string
@@ -53,17 +78,66 @@ repos:
        entry: bash ./tools/codestyle/pylint_pre_commit.hook
        language: system
        files: \.(py)$
+# For C++ files
 -   repo: local
    hooks:
-    -   id: copyright_checker
+    -   id: clang-format
-        name: copyright_checker
+        name: clang-format
-        entry: python ./tools/codestyle/copyright.hook
+        description: Format files with ClangFormat.
+        entry: bash ./tools/codestyle/clang_format.hook -i
        language: system
-        files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|xpu|kps|py|sh)$
+        files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx|xpu|kps)$
-        exclude: |
+-   repo: local
+    hooks:
+    -   id: cpplint-cpp-source
+        name: cpplint
+        description: Check C++ code style using cpplint.py.
+        entry: bash ./tools/codestyle/cpplint_pre_commit.hook
+        language: system
+        files: \.(c|cc|cxx|cpp|cu|h|hpp|hxx)$
+        args:
+            - --extensions=c,cc,cxx,cpp,cu,cuh,h,hpp,hxx,kps
+            - --filter=-readability/fn_size,-build/include_what_you_use,-build/c++11,-whitespace/parens
+            - --quiet
+        # Exclude third-party libraries
+        exclude:  |
            (?x)^(
-                paddle/utils/.*
+                paddle/utils/flat_hash_map\.h
            )$
+-   repo: local
+    hooks:
+    -   id: clang-tidy
+        name: clang-tidy
+        description: Parallel clang-tidy runner.
+        entry: python ./tools/codestyle/clang-tidy.py
+        language: system
+        files: \.(c|cc|cxx|cpp|h|hpp|hxx)$
+        args:
+            - -p=build/
+            - -extra-arg=-Wno-unknown-warning-option
+            - -extra-arg=-Wno-pessimizing-move
+            - -extra-arg=-Wno-braced-scalar-init
+            - -extra-arg=-Wno-dangling-gsl
+            - -extra-arg=-Wno-deprecated-copy
+            - -extra-arg=-Wno-final-dtor-non-final-class
+            - -extra-arg=-Wno-implicit-int-float-conversion
+            - -extra-arg=-Wno-inconsistent-missing-override
+            - -extra-arg=-Wno-infinite-recursion
+            - -extra-arg=-Wno-mismatched-tags
+            - -extra-arg=-Wno-self-assign
+            - -extra-arg=-Wno-sign-compare
+            - -extra-arg=-Wno-sometimes-uninitialized
+            - -extra-arg=-Wno-tautological-overlap-compare
+            - -extra-arg=-Wno-unused-const-variable
+            - -extra-arg=-Wno-unused-lambda-capture
+            - -extra-arg=-Wno-unused-private-field
+            - -extra-arg=-Wno-unused-value
+            - -extra-arg=-Wno-unused-variable
+            - -extra-arg=-Wno-overloaded-virtual
+            - -extra-arg=-Wno-defaulted-function-deleted
+            - -extra-arg=-Wno-delete-non-abstract-non-virtual-dtor
+            - -extra-arg=-Wno-return-type-c-linkage
+# For CMake files
 -   repo: local
    hooks:
    -   id: auto-generate-cmakelists
@@ -81,7 +155,6 @@ repos:
            (?x)^(
                paddle/fluid/operators/CMakeLists.txt
            )$
 -   repo: https://github.com/cmake-lint/cmake-lint
    rev: 1.4.2
    hooks:

--- a/AUTHORS.md
+++ b/AUTHORS.md
+This is an incomplete list of authors of [Paddle](https://github.com/PaddlePaddle/Paddle/) codebase, to see a full list, please use the source control tool git. PaddlePaddle community encourages every Paddle codebase author include his/her GitHub account and fullname here.
 | Github account | name |
 |---|---|
 | abhinavarora | Abhinav Arora |
@@ -9,6 +12,7 @@
 | beckett1124 | Bin Qi |
 | ChengduoZH | Cheng-Duo Zhao|
 | chengxiaohua1105 | Xiao-Hua Cheng |
+| chenwhql | Wei-Hang Chen |
 | cxwangyi, yiwangbaidu, wangkuiyi | Yi Wang |
 | cxysteven | Xing-Yi Cheng |
 | ddokupil | Dariusz Dokupil |
@@ -16,6 +20,7 @@
 | dragonwarrior | Long Wang |
 | dyning | Yuning Du |
 | emailweixu | Wei Xu |
+| engineer1109 | Jia-Liang Wang |
 | gangliao | Gang Liao |
 | gongweibao | Wei-Bao Gong |
 | guru4elephant | Daxiang Dong |
@@ -24,9 +29,13 @@
 | Haichao-Zhang | Hai-Chao Zhang |
 | hedaoyuan | Dao-Yuan He |
 | helinwang | He-Lin Wang |
+| heliqi | Li-Qi He |
+| houj04 | HOU Jue |
+| HulekJakub | Jakub Hulek |
 | jacquesqiao | Long-Fei Qiao |
 | [jakpiase](https://raw.githubusercontent.com/jakpiase/Paddle/new_paddle_intel_authors/img/img.jpg) | Jakub Piasecki |
 | [jczaja](https://raw.githubusercontent.com/jakpiase/Paddle/new_paddle_intel_authors/img/img.jpg) | Jacek Czaja |
+| jiahy0825 | Hongyu Jia |
 | JiayiFeng | Jia-Yi Feng |
 | kbinias | Krzysztof Binias |
 | kexinzhao | Ke-Xin Zhao |
@@ -79,9 +88,39 @@
 | xushaoyong | Shao-Yong Xu |
 | Yancey1989 | Xu Yan |
 | zhaopu7 | Pu Zhao |
+| zhiqiu | Qiu-Liang Chen |
 | zhouxiao-coder | Xiao Zhou |
 | Zrachel | Rui-Qing Zhang |
 | jeng1220 | Bai-Cheng(Ryan) Jeng (NVIDIA) |
 | mingxu1067 | Ming Huang (NVIDIA) |
 | zlsh80826 | Reese Wang (NVIDIA) |
 | leo0519 | Leo Chen (NVIDIA) |
+| jzhang533 | Jun Zhang |
+| Ligoml | Meng-Liu Li |
+| jeff41404 | Xiang Gao |
+| zh794390558 | Hui Zhang |
+| limin2021 | Min Li |
+| zhouwei25 | Wei Zhou |
+| littletomatodonkey | Ruo-Yu Guo |
+| zhupengyang | Zhu Pengyang |
+| DesmonDay | Siming Dai |
+| thisjiang | jiangcheng |
+| yghstill | Guanghua Yu |
+| CtfGo | Tefeng Chen |
+| ZHUI | Hui Zhong|
+| LemonNoel | Huijuan Wang |
+| wawltor | Zeyang Fang |
+| FrostML | Zheng-Xi Liu |
+| jiangjiajun | jiangjiajun |
+| dingjiaweiww | dingjiawei |
+| gglin001 | Allen Guo (Graphcore) |
+| yaozhixin | Zhixin Yao (Graphcore) |
+| XBWGC | Xiaobing Wang (Graphcore) |
+| jianghaicheng | Haicheng Jiang (Graphcore) |
+| czr-gc | Zhaorui Chen (Graphcore) |
+| zhao-han | Han Zhao (Graphcore) |
+| yiakwy, yiakwy-xpu-ml-framework-team | Yi Wang (Graphcore) |
+| [Yulv-git](https://github.com/Yulv-git) | Shuangchi He |
+| [zrr1999](https://github.com/zrr1999) | Rongrui Zhan |
+| [will-jl944](https://github.com/will-jl944) | Jiafeng Lu |
+| [gouzil](https://github.com/gouzil) | Chuan Tian |
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -23,6 +23,7 @@ endif()
 # use to get_property location of static lib
 # https://cmake.org/cmake/help/v3.0/policy/CMP0026.html?highlight=cmp0026
 cmake_policy(SET CMP0026 OLD)
+cmake_policy(SET CMP0079 NEW)
 set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake")
 set(PADDLE_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
 set(PADDLE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR})
@@ -47,20 +48,30 @@ find_package(CUDA QUIET)
 find_package(MKL CONFIG QUIET)
 option(WITH_ONEMKL "Compile PaddlePaddle with oneMKL" OFF)
 option(WITH_GPU "Compile PaddlePaddle with NVIDIA GPU" ${CUDA_FOUND})
+option(WITH_MPI "Compile PaddlePaddle with MPI" OFF)
 option(WITH_TENSORRT "Compile PaddlePaddle with NVIDIA TensorRT" OFF)
 option(WITH_XPU "Compile PaddlePaddle with BAIDU KUNLUN XPU" OFF)
 option(WITH_XPU_KP "Compile PaddlePaddle with BAIDU XPU compiler " OFF)
-option(WITH_MLU "Compile PaddlePaddle with CAMBRICON MLU" OFF)
+option(WITH_XPU_XFT "Compile PaddlePaddle with BAIDU XPU-XFT" OFF)
+option(WITH_XPU_PLUGIN "Compile PaddlePaddle with BAIDU XPU plugin" OFF)
 option(WITH_WIN_DUMP_DBG "Compile with windows core dump debug mode" OFF)
-option(WITH_ASCEND "Compile PaddlePaddle with ASCEND" OFF)
 option(WITH_ROCM "Compile PaddlePaddle with ROCM platform" OFF)
 option(WITH_IPU "Compile PaddlePaddle with Graphcore IPU" OFF)
-# NOTE(zhiqiu): WITH_ASCEND_CL can be compile on x86_64, so we can set WITH_ASCEND=OFF and WITH_ASCEND_CL=ON
-# to develop some acl related functionality on x86
-option(WITH_ASCEND_CL "Compile PaddlePaddle with ASCEND CL" ${WITH_ASCEND})
-option(WITH_ASCEND_CXX11 "Compile PaddlePaddle with ASCEND and CXX11 ABI" OFF)
 option(WITH_ONNXRUNTIME "Compile PaddlePaddle with ONNXRUNTIME" OFF)
 option(WITH_CUSPARSELT "Compile PaddlePaddle with CUSPARSELT" OFF)
+option(WITH_SETUP_INSTALL "Compile PaddlePaddle with setup.py" OFF)
+option(WITH_SHARED_PHI "Compile PaddlePaddle with SHARED LIB of PHI" OFF)
+option(CINN_ONLY "Compile CINN only in Paddle" OFF)
+option(CINN_WITH_CUDNN "Compile CINN with CUDNN support" ON)
+find_package(Git REQUIRED)
+# config GIT_URL with github mirrors to speed up dependent repos clone
+option(GIT_URL "Git URL to clone dependent repos" ${GIT_URL})
+if(NOT GIT_URL)
+  set(GIT_URL "https://github.com")
+endif()
 # Note(zhouwei): It use option above, so put here
 include(init)
 include(generic) # simplify cmake module
@@ -72,15 +83,12 @@ endif()
 if(WITH_GPU AND WITH_XPU_KP)
  message(FATAL_ERROR "Error when compile GPU and XPU2 at the same time")
 endif()
-if(WITH_GPU AND WITH_ASCEND)
+if(WITH_GPU AND WITH_XPU_XFT)
-  message(FATAL_ERROR "Error when compile GPU and ASCEND at the same time")
+  message(FATAL_ERROR "Error when compile GPU and XPU-XFT at the same time")
 endif()
 if(WITH_GPU AND WITH_ROCM)
  message(FATAL_ERROR "Error when compile CUDA and ROCM at the same time")
 endif()
-if(WITH_GPU AND WITH_MLU)
-  message(FATAL_ERROR "Error when compile GPU and MLU at the same time")
-endif()
 if(WITH_GPU AND NOT APPLE)
  enable_language(CUDA)
@@ -94,6 +102,11 @@ message(STATUS "C compiler: ${CMAKE_C_COMPILER}, version: "
               "${CMAKE_C_COMPILER_ID} ${CMAKE_C_COMPILER_VERSION}")
 message(STATUS "AR tools: ${CMAKE_AR}")
+if((CMAKE_CXX_COMPILER_ID STREQUAL "GNU") AND CMAKE_CXX_COMPILER_VERSION
+                                              VERSION_GREATER 10.4)
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-error=uninitialized")
+endif()
 # MUSL build turn off warnings
 if(WITH_MUSL)
  set(CMAKE_CXX_FLAGS
@@ -106,17 +119,9 @@ if(APPLE AND WITH_ARM)
  set(CMAKE_CXX_FLAGS "${CMAKE_C_FLAGS} -target arm64-apple-darwin")
 endif()
-if(WITH_ASCEND_CL AND NOT WITH_ASCEND_CXX11)
-  if(WITH_ARM_BRPC)
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=1")
-  else()
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=0")
-  endif()
-endif()
 if(WIN32)
  option(MSVC_STATIC_CRT "use static C Runtime library by default" ON)
+  message("Build static library of PHI")
  set(CMAKE_SUPPRESS_REGENERATION ON)
  set(CMAKE_STATIC_LIBRARY_PREFIX lib)
@@ -233,14 +238,6 @@ else()
  )
 endif()
-find_package(Git REQUIRED)
-# config GIT_URL with github mirrors to speed up dependent repos clone
-option(GIT_URL "Git URL to clone dependent repos" ${GIT_URL})
-if(NOT GIT_URL)
-  set(GIT_URL "https://github.com")
-endif()
 find_package(Threads REQUIRED)
 include(simd)
@@ -256,6 +253,7 @@ option(WITH_DISTRIBUTE "Compile with distributed support" OFF)
 option(WITH_BRPC_RDMA "Use brpc rdma as the rpc protocal" OFF)
 option(ON_INFER "Turn on inference optimization and inference-lib generation"
       ON)
+option(WITH_CPP_DIST "Install PaddlePaddle C++ distribution" OFF)
 ################################ Internal Configurations #######################################
 option(WITH_NV_JETSON "Compile PaddlePaddle with NV JETSON" OFF)
 option(WITH_PROFILER "Compile PaddlePaddle with GPU profiler and gperftools"
@@ -268,11 +266,11 @@ option(COVERALLS_UPLOAD "Package code coverage data to coveralls" OFF)
 option(WITH_PSLIB "Compile with pslib support" OFF)
 option(WITH_BOX_PS "Compile with box_ps support" OFF)
 option(WITH_XBYAK "Compile with xbyak support" ON)
-option(WITH_CONTRIB "Compile the third-party contributation" OFF)
 option(WITH_PSCORE "Compile with parameter server support" ${WITH_DISTRIBUTE})
-option(WITH_HETERPS "Compile with heterps" OFF})
+option(WITH_HETERPS "Compile with heterps" OFF)
 option(WITH_INFERENCE_API_TEST
       "Test fluid inference C++ high-level api interface" OFF)
+option(WITH_NVTX "Paddle with nvtx for profiler" OFF)
 option(PY_VERSION "Compile PaddlePaddle with python3 support" ${PY_VERSION})
 option(WITH_DGC "Use DGC(Deep Gradient Compression) or not" ${WITH_DISTRIBUTE})
 option(
@@ -281,15 +279,14 @@ option(
  OFF)
 option(WITH_LITE "Compile Paddle Fluid with Lite Engine" OFF)
 option(WITH_CINN "Compile PaddlePaddle with CINN" OFF)
-option(WITH_INFRT "Compile PaddlePaddle with INFRT" OFF)
 option(WITH_NCCL "Compile PaddlePaddle with NCCL support" ON)
 option(WITH_RCCL "Compile PaddlePaddle with RCCL support" ON)
 option(WITH_XPU_BKCL "Compile PaddlePaddle with BAIDU KUNLUN XPU BKCL" OFF)
-option(WITH_CNCL "Compile PaddlePaddle with CNCL support" OFF)
 option(WITH_CRYPTO "Compile PaddlePaddle with crypto support" ON)
 option(WITH_ARM "Compile PaddlePaddle with arm support" OFF)
 option(WITH_SW "Compile PaddlePaddle with sw support" OFF)
 option(WITH_MIPS "Compile PaddlePaddle with mips support" OFF)
+option(WITH_LOONGARCH "Compile PaddlePaddle with loongarch support" OFF)
 option(WITH_MUSL "Compile with musl libc instead of gblic" OFF)
 option(WITH_UNITY_BUILD "Compile with UnityBuild mode" OFF)
 option(WITH_STRIP "Strip so files of Whl packages" OFF)
@@ -300,13 +297,18 @@ option(NEW_RELEASE_ALL
       OFF)
 option(NEW_RELEASE_JIT
       "PaddlePaddle next-level release strategy for backup jit package" OFF)
-option(WITH_ASCEND_INT64 "Compile with int64 kernel for ascend NPU" OFF)
 option(WITH_POCKETFFT "Compile with pocketfft support" ON)
 option(WITH_RECORD_BUILDTIME
       "Compile PaddlePaddle with record all targets build time" OFF)
 option(WITH_CUSTOM_DEVICE "Compile with custom device support" OFF)
 option(WITH_ARM_BRPC "Supprot Brpc in Arm" OFF)
 option(WITH_FLPS "FL PS mode" OFF)
+option(WITH_RPC "Compile with rpc support" ${WITH_DISTRIBUTE})
+option(WITH_CUDNN_FRONTEND
+       "Compile with CUDNN Frontend API support (experimental)" OFF)
+option(WITH_CUDNN_DSO "Compile PaddlePaddle with cuDNN dynamic-link libraries"
+       OFF)
+option(WITH_SHARED_IR "Compile PaddlePaddle with SHARED LIB of IR" ON)
 if(WITH_RECORD_BUILDTIME)
  set_property(
@@ -329,7 +331,7 @@ unset(WITH_RECORD_BUILDTIME CACHE)
 # PY_VERSION
 if(NOT PY_VERSION)
-  set(PY_VERSION 3.6)
+  set(PY_VERSION 3.7)
 endif()
 set(PYBIND11_PYTHON_VERSION ${PY_VERSION})
@@ -342,6 +344,11 @@ endif()
 if(LINUX
   AND NOT WITH_CUSTOM_DEVICE
+   AND NOT WITH_GPU
+   AND NOT WITH_ROCM
+   AND NOT WITH_XPU
+   AND NOT WITH_XPU_KP
+   AND NOT WITH_XPU_XFT
   AND WITH_PYTHON)
  set(WITH_CUSTOM_DEVICE
      ON
@@ -390,6 +397,16 @@ if(NOT WITH_GPU AND WITH_NCCL)
      CACHE STRING "Disable NCCL when compiling without GPU" FORCE)
 endif()
+if(NOT WITH_GPU AND WITH_CUDNN_DSO)
+  message(
+    WARNING
+      "Can't compile with cuDNN libraries when compiling without GPU. Force WITH_CUDNN_DSO=OFF."
+  )
+  set(WITH_CUDNN_DSO
+      OFF
+      CACHE STRING "Disable cuDNN libraries when compiling without GPU" FORCE)
+endif()
 # force WITH_XPU on when WITH_XPU_KP
 if(WITH_XPU_KP AND NOT WITH_XPU)
  message(
@@ -400,20 +417,29 @@ if(WITH_XPU_KP AND NOT WITH_XPU)
      CACHE STRING "Enable WITH_XPU when compiling with WITH_XPU_KP" FORCE)
 endif()
-if(NOT WITH_XPU AND WITH_XPU_BKCL)
+if(NOT WITH_XPU AND WITH_XPU_XFT)
  message(
-    WARNING "Disable BKCL when compiling without XPU. Force WITH_XPU_BKCL=OFF.")
+    WARNING
-  set(WITH_XPU_BKCL
+      "Enable WITH_XPU when compiling with WITH_XPU_XFT. Force WITH_XPU=ON.")
+  set(WITH_XPU
+      ON
+      CACHE STRING "Enable WITH_XPU when compiling with WITH_XPU_XFT" FORCE)
+endif()
+if(NOT WITH_XPU AND WITH_XPTI)
+  message(
+    WARNING "Disable XPTI when compiling without XPU. Force WITH_XPTI=OFF.")
+  set(WITH_XPTI
      OFF
-      CACHE STRING "Disable BKCL when compiling without XPU" FORCE)
+      CACHE STRING "Disable XPTI when compiling without XPU" FORCE)
 endif()
-if(NOT WITH_MLU AND WITH_CNCL)
+if(NOT WITH_XPU AND WITH_XPU_BKCL)
  message(
-    WARNING "Disable CNCL when compiling without MLU. Force WITH_MLU=OFF.")
+    WARNING "Disable BKCL when compiling without XPU. Force WITH_XPU_BKCL=OFF.")
-  set(WITH_MLU
+  set(WITH_XPU_BKCL
      OFF
-      CACHE STRING "Disable CNCL when compiling without MLU" FORCE)
+      CACHE STRING "Disable BKCL when compiling without XPU" FORCE)
 endif()
 if(WITH_NCCL)
@@ -447,12 +473,7 @@ if(WITH_GPU)
  endif()
 endif()
-if(WITH_MLU)
-  include(neuware)
-endif()
 if(WITH_ROCM)
-  add_definitions(-D__CUDA_HIP_PLATFORM_AMD__)
  include(hip)
  include(miopen) # set miopen libraries, must before configure
  include(cupti)
@@ -492,15 +513,6 @@ if(WITH_DISTRIBUTE)
        ON
        CACHE STRING "Enable GLOO when compiling WITH_DISTRIBUTE=ON." FORCE)
  endif()
-  if(WITH_ASCEND_CL AND NOT WITH_ARM_BRPC)
-    # disable WITH_PSCORE for NPU before include third_party
-    message(
-      WARNING
-        "Disable WITH_PSCORE when compiling with NPU. Force WITH_PSCORE=OFF.")
-    set(WITH_PSCORE
-        OFF
-        CACHE BOOL "Disable WITH_PSCORE when compiling with NPU" FORCE)
-  endif()
  if(WITH_ROCM AND HIP_VERSION LESS_EQUAL 40020496)
    # TODO(qili93): third-party rocksdb throw Illegal instruction with HIP version 40020496
    message(
@@ -516,11 +528,92 @@ if(WITH_DISTRIBUTE)
  endif()
 endif()
+if(WITH_RPC)
+  if(NOT LINUX)
+    message(
+      WARNING "Disable WITH_RPC when not compiled on Linux. Force WITH_RPC=OFF."
+    )
+    set(WITH_RPC
+        OFF
+        CACHE BOOL "Disable WITH_RPC when not compiled on Linux" FORCE)
+  endif()
+  if(NOT WITH_DISTRIBUTE AND WITH_RPC)
+    message(
+      WARNING
+        "Disable WITH_RPC when not compiled with distribute. Force WITH_RPC=OFF."
+    )
+    set(WITH_RPC
+        OFF
+        CACHE BOOL "Disable WITH_RPC when not compiled with distribute" FORCE)
+  endif()
+  if(WITH_ROCM AND WITH_RPC)
+    message(
+      WARNING "Disable WITH_RPC when compiling with ROCM. Force WITH_RPC=OFF.")
+    set(WITH_RPC
+        OFF
+        CACHE BOOL "Disable WITH_RPC when compiling with ROCM" FORCE)
+  endif()
+  if(WITH_XPU AND WITH_RPC)
+    message(
+      WARNING "Disable WITH_RPC when compiling with XPU. Force WITH_RPC=OFF.")
+    set(WITH_RPC
+        OFF
+        CACHE BOOL "Disable WITH_RPC when compiling with XPU" FORCE)
+  endif()
+  if(WITH_CINN AND WITH_RPC)
+    message(
+      WARNING "Disable WITH_RPC when compiling with CINN. Force WITH_RPC=OFF.")
+    set(WITH_RPC
+        OFF
+        CACHE BOOL "Disable WITH_RPC when compiling with CINN" FORCE)
+  endif()
+endif()
+if(WITH_MPI)
+  include(mpi)
+endif()
 include(third_party
 )# download, build, install third_party, Contains about 20+ dependencies
 include(flags) # set paddle compile flags
+#------------- cinn cmake config start --------------
+if(WITH_CINN)
+  message(STATUS "Compile Paddle with CINN.")
+  message(
+    WARNING
+      "Enable WITH_SHARED_PHI when compiling with CINN. Force WITH_SHARED_PHI=ON."
+  )
+  set(WITH_SHARED_PHI
+      ON
+      CACHE BOOL "Enable WITH_SHARED_PHI when compiling with CINN" FORCE)
+  # TODO(6clc): Use CINN_WITH_CUDNN to completely replace WITH_CUDNN in CINN.
+  #             Use WITH_GPU to completely replace WITH_CUDA in CINN.
+  set(WITH_MKL_CBLAS ${WITH_MKL})
+  if(WITH_GPU)
+    set(WITH_CUDA ${WITH_GPU})
+    add_definitions(-DCINN_WITH_CUDA)
+    set(WITH_CUDNN ${CINN_WITH_CUDNN})
+    if(WITH_CUDNN)
+      add_definitions(-DCINN_WITH_CUDNN)
+    endif()
+  endif()
+  include(cmake/cinn.cmake)
+  add_definitions(-DPADDLE_WITH_CINN)
+  if(CINN_ONLY)
+    if(WITH_PYTHON)
+      add_subdirectory(python)
+    endif()
+    add_subdirectory(test)
+    return()
+  endif()
+endif()
+#------------- cinn cmake config end --------------
 if(WITH_PROFILER)
  find_package(Gperftools REQUIRED)
  include_directories(${GPERFTOOLS_INCLUDE_DIR})
@@ -575,6 +668,20 @@ if(WITH_MIPS)
  add_definitions(-DPADDLE_WITH_MIPS)
 endif()
+if(WITH_NVTX AND NOT WIN32)
+  add_definitions(-DPADDLE_WITH_NVTX)
+endif()
+if(WITH_LOONGARCH)
+  set(WITH_XBYAK
+      OFF
+      CACHE STRING "Disable XBYAK when compiling WITH_LOONGARCH=ON" FORCE)
+  set(WITH_MKL
+      OFF
+      CACHE STRING "Disable MKL when compiling WITH_LOONGARCH=ON." FORCE)
+  add_definitions(-DPADDLE_WITH_LOONGARCH)
+endif()
 if(WITH_ONEMKL)
  add_definitions(-DPADDLE_WITH_ONEMKL)
 endif()
@@ -597,6 +704,9 @@ if(ON_INFER)
    STATUS "On inference mode, will take place some specific optimization.")
  include(inference_lib)
  add_definitions(-DPADDLE_ON_INFERENCE)
+  set(WITH_SHARED_IR
+      OFF
+      CACHE BOOL "Only paddle_inference.so is allowed in inference." FORCE)
 else()
  #TODO(luotao), combine this warning with `make inference_lib_dist` command.
  message(
@@ -605,6 +715,10 @@ else()
  )
 endif()
+if(NOT WITH_SHARED_IR)
+  add_definitions(-DSTATIC_IR)
+endif()
 if(WITH_STRIP)
  find_program(STRIP_PATH strip)
  if(NOT STRIP_PATH OR NOT LINUX)
@@ -615,10 +729,26 @@ if(WITH_STRIP)
  endif()
 endif()
+if(WITH_CPP_DIST)
+  # TODO(huangjiyi): Separate installing C++ distribution from python package
+  # installation and support for installing C++ distribution on more platforms.
+  if(NOT LINUX OR NOT WITH_PYTHON)
+    set(WITH_CPP_DIST
+        OFF
+        CACHE
+          STRING
+          "Currently C++ Distribution Generation is only available on Linux and compiling WITH_PYTHON=ON."
+          FORCE)
+  else()
+    include(paddle_lib)
+  endif()
+endif()
 add_subdirectory(paddle)
 if(WITH_PYTHON)
  add_subdirectory(python)
 endif()
+add_subdirectory(test)
 get_directory_property(all_inc_dirs INCLUDE_DIRECTORIES)
 list(JOIN all_inc_dirs "\r\n" all_inc_dirs)

--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
 # Contribute Code
-You are welcome to contribute to project PaddlePaddle. To contribute to PaddlePaddle, you have to agree with the 
+You are welcome to contribute to project PaddlePaddle. To contribute to PaddlePaddle, you have to agree with the
 [PaddlePaddle Contributor License Agreement](https://gist.github.com/XiaoguangHu01/75018ad8e11af13df97070dd18ae6808).
 We sincerely appreciate your contribution.  This document explains our workflow and work style.

--- a/README.md
+++ b/README.md
@@ -4,152 +4,93 @@
 --------------------------------------------------------------------------------
-# 飞桨框架 ROCm 版安装说明
+English | [简体中文](./README_cn.md) | [日本語](./README_ja.md)
-飞桨框架 ROCm 版支持基于海光 CPU 和海光 DCU 的训练和预测，不仅支持 AMD ROCm，同样支持海光 DCUToolkit（DTK），当前支持的 ROCm 版本为 4.0.1，支持的 DTK 有多个版本。提供两种安装方式：
+[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](https://paddlepaddle.org.cn/documentation/docs/en/guides/index_en.html)
+[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](https://paddlepaddle.org.cn/documentation/docs/zh/guides/index_cn.html)
+[![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle.svg)](https://github.com/PaddlePaddle/Paddle/releases)
+[![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE)
+[![Twitter](https://img.shields.io/badge/Twitter-1ca0f1.svg?logo=twitter&logoColor=white)](https://twitter.com/PaddlePaddle)
- 通过预编译的 wheel 包安装
+Welcome to the PaddlePaddle GitHub.
- 通过源代码编译安装
-**说明**：基于对应 DTK 版本的飞桨 wheel 包可在[光合开发者社区 ](https://developer.hpccube.com/tool/#sdk) AI 生态包中进行下载
+PaddlePaddle, as the first independent R&D deep learning platform in China, has been officially open-sourced to professional communities since 2016. It is an industrial platform with advanced technologies and rich features that cover core deep learning frameworks, basic model libraries, end-to-end development kits, tools & components as well as service platforms.
+PaddlePaddle is originated from industrial practices with dedication and commitments to industrialization. It has been widely adopted by a wide range of sectors including manufacturing, agriculture, enterprise service, and so on while serving more than 5.35 million developers, 200,000 companies and generating 670,000 models. With such advantages, PaddlePaddle has helped an increasing number of partners commercialize AI.
-## 安装方式一：通过 wheel 包安装
-**注意**：当前提供基于 CentOS 7.8 & ROCm 4.0.1 的 docker 镜像，与 Python 3.7 的 wheel 安装包。同时提供基于 CentOS 7.6 & DTK 22.10.1 的 docker 镜像，镜像中包含 Python 3.7 的飞浆 2.3.2 wheel 安装包（ image.sourcefind.cn:5000/dcu/admin/base/paddlepaddle:2.3.2-centos7.6-dtk-22.10.1-py37-latest ）
+## Installation
-**第一步**：准备 CentOS 7.6 & DTK 22.10.1 运行环境 (推荐使用 Paddle 镜像)
+### Latest PaddlePaddle Release: [v2.5](https://github.com/PaddlePaddle/Paddle/tree/release/2.5)
-可以直接从 Paddle 的官方镜像库拉取预先装有 CentOS 7.6 & DTK 22.10.1 的 docker 镜像
+Our vision is to enable deep learning for everyone via PaddlePaddle.
+Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddle/releases) to track the latest features of PaddlePaddle.
-```bash
+### Install Latest Stable Release:
-# 拉取镜像
-docker pull image.sourcefind.cn:5000/dcu/admin/base/paddlepaddle:2.3.2-centos7.6-dtk-22.10.1-py37-latest
-# 启动容器，注意这里的参数，例如 shm-size, device 等都需要配置
-docker run -it --network=host --name=oneflow_compile --privileged --device=/dev/kfd --device=/dev/dri --ipc=host --shm-size=16G  --group-add video --cap-add=SYS_PTRACE --security-opt seccomp=unconfined -u root --ulimit stack=-1:-1 --ulimit memlock=-1:-1 -v /public/home/xxx:/home image.sourcefind.cn:5000/dcu/admin/base/paddlepaddle:2.3.2-centos7.6-dtk-22.10.1-py37-latest /bin/bash
-# 检查容器是否可以正确识别海光 DCU 设备
-rocm-smi
-# 预期得到以下结果：
-======================= ROCm System Management Interface =======================
-================================= Concise Info =================================
-GPU  Temp   AvgPwr  SCLK     MCLK    Fan   Perf  PwrCap  VRAM%  GPU%
-0    50.0c  23.0W   1319Mhz  800Mhz  0.0%  auto  300.0W    0%   0%
-1    48.0c  25.0W   1319Mhz  800Mhz  0.0%  auto  300.0W    0%   0%
-2    48.0c  24.0W   1319Mhz  800Mhz  0.0%  auto  300.0W    0%   0%
-3    49.0c  27.0W   1319Mhz  800Mhz  0.0%  auto  300.0W    0%   0%
-================================================================================
-============================= End of ROCm SMI Log ==============================
 ```
+# CPU
+pip install paddlepaddle
+# GPU
+pip install paddlepaddle-gpu
-**第二步**：此镜像中已经集成 Python 3.7 的飞浆 2.3.2 版本，如果重新安装需要
-```bash
-pip3 uninstall paddlepaddle-rocm
-pip3 install paddlepaddle-2.3.2_dtk2210_git0195561-cp37-cp37m-manylinux2014_x86_64.whl
 ```
+For more information about installation, please view [Quick Install](https://www.paddlepaddle.org.cn/install/quick)
-**第三步**：验证安装包
+Now our developers can acquire Tesla V100 online computing resources for free. If you create a program by AI Studio, you will obtain 8 hours to train models online per day. [Click here to start](https://aistudio.baidu.com/aistudio/index).
-安装完成之后，运行如下命令。如果出现 PaddlePaddle is installed successfully!，说明已经安装成功
+## FOUR LEADING TECHNOLOGIES
-```bash
+- **Agile Framework for Industrial Development of Deep Neural Networks**
-python -c "import paddle; paddle.utils.run_check()"
-```
-## 安装方式二：通过源码编译安装
+    The PaddlePaddle deep learning framework facilitates the development while lowering the technical burden, through leveraging a programmable scheme to architect the neural networks. It supports both declarative programming and imperative programming with both development flexibility and high runtime performance preserved.  The neural architectures could be automatically designed by algorithms with better performance than the ones designed by human experts.
-**注意**：可使用 Paddle 支持的 CentOS 7.8 & ROCm 4.0.1 编译镜像，且根据 ROCm 4.0.1 的需求，支持的编译器为 devtoolset-7
-**第一步**：准备 ROCm 4.0.1 编译环境 (推荐使用 Paddle 镜像)
+-  **Support Ultra-Large-Scale Training of Deep Neural Networks**
-可以直接从 Paddle 的官方镜像库拉取预先装有 ROCm 4.0.1 的 docker 镜像，在[开发者社区](https://developer.hpccube.com/tool/#sdk) DCU Toolkit 中下载 DTK-22.10.1 解压至 /opt/ 路径下，更换/opt下的原有的  ROCm 4.0.1 文件夹。
+    PaddlePaddle has made breakthroughs in ultra-large-scale deep neural networks training. It launched the world's first large-scale open-source training platform that supports the training of deep networks with 100 billion features and trillions of parameters using data sources distributed over hundreds of nodes. PaddlePaddle overcomes the online deep learning challenges for ultra-large-scale deep learning models, and further achieved real-time model updating with more than 1 trillion parameters.
+     [Click here to learn more](https://github.com/PaddlePaddle/Fleet)
-```bash
-# 拉取镜像
-docker pull paddlepaddle/paddle:latest-dev-rocm4.0-miopen2.11
-# 启动容器，注意这里的参数，例如 shm-size, device 等都需要配置
+- **High-Performance Inference Engines for Comprehensive Deployment Environments**
-docker run -it --name paddle-rocm-dev --shm-size=128G \
-     --device=/dev/kfd --device=/dev/dri --group-add video \
-     --cap-add=SYS_PTRACE --security-opt seccomp=unconfined \
-     paddlepaddle/paddle:latest-dev-rocm4.0-miopen2.11 /bin/bash
-# 替换DTK
+   PaddlePaddle is not only compatible with models trained in 3rd party open-source frameworks , but also offers complete inference products for various production scenarios. Our inference product line includes [Paddle Inference](https://paddle-inference.readthedocs.io/en/master/guides/introduction/index_intro.html): Native inference library for high-performance server and cloud inference; [Paddle Serving](https://github.com/PaddlePaddle/Serving): A service-oriented framework suitable for distributed and pipeline productions; [Paddle Lite](https://github.com/PaddlePaddle/Paddle-Lite): Ultra-Lightweight inference engine for mobile and IoT environments; [Paddle.js](https://www.paddlepaddle.org.cn/paddle/paddlejs): A frontend inference engine for browser and mini-apps. Furthermore, by great amounts of optimization with leading hardware in each scenario, Paddle inference engines outperform most of the other mainstream frameworks.
-# 检查容器是否可以正确识别海光 DCU 设备
-rocm-smi
-# 预期得到以下结果：
+- **Industry-Oriented Models and Libraries with Open Source Repositories**
-======================= ROCm System Management Interface =======================
-================================= Concise Info =================================
-GPU  Temp   AvgPwr  SCLK     MCLK    Fan   Perf  PwrCap  VRAM%  GPU%
-0    50.0c  23.0W   1319Mhz  800Mhz  0.0%  auto  300.0W    0%   0%
-1    48.0c  25.0W   1319Mhz  800Mhz  0.0%  auto  300.0W    0%   0%
-2    48.0c  24.0W   1319Mhz  800Mhz  0.0%  auto  300.0W    0%   0%
-3    49.0c  27.0W   1319Mhz  800Mhz  0.0%  auto  300.0W    0%   0%
-================================================================================
-============================= End of ROCm SMI Log ==============================
-```
-请在编译之前，检查如下的环境变量是否正确，如果没有则需要安装相应的依赖库，并导出相应的环境变量。以 Paddle 官方的镜像举例，环境变量如下：
+     PaddlePaddle includes and maintains more than 100 mainstream models that have been practiced and polished for a long time in the industry. Some of these models have won major prizes from key international competitions. In the meanwhile, PaddlePaddle has further more than 200 pre-training models (some of them with source codes) to facilitate the rapid development of industrial applications.
+     [Click here to learn more](https://github.com/PaddlePaddle/models)
-```bash
-# PATH 与 LD_LIBRARY_PATH 中存在 devtoolset-7，如果没有运行以下命令
-source /opt/rh/devtoolset-7/enable
-# PATH 中存在 cmake 3.16.0
+## Documentation
-export PATH=/opt/cmake-3.16/bin:${PATH}
-# PATH 与 LD_LIBRARY_PATH 中存在 rocm 4.0.1
+We provide [English](https://www.paddlepaddle.org.cn/documentation/docs/en/guides/index_en.html) and
-export PATH=/opt/rocm/opencl/bin:/opt/rocm/bin:${PATH}
+[Chinese](https://www.paddlepaddle.org.cn/documentation/docs/zh/guide/index_cn.html) documentation.
-export LD_LIBRARY_PATH=/opt/rocm/lib:${LD_LIBRARY_PATH}
-# PATH 中存在 Python 3.7
+- [Guides](https://www.paddlepaddle.org.cn/documentation/docs/en/guides/index_en.html)
-# 注意：镜像中的 python 3.7 通过 miniconda 安装，请通过 conda activate base 命令加载 Python 3.7 环境
-export PATH=/opt/conda/bin:${PATH}
-```
-**第二步**：下载 Paddle 源码并编译，CMAKE 编译选项含义请参见[编译选项表](https://www.paddlepaddle.org.cn/documentation/docs/zh/develop/install/Tables.html#Compile)，如果指定 Paddle 版本，需要在编译前指定环境变量 PADDLE_VERSION
+  You might want to start from how to implement deep learning basics with PaddlePaddle.
-```bash
+- [Practice](https://www.paddlepaddle.org.cn/documentation/docs/zh/tutorial/index_cn.html)
-# 下载源码，默认 develop 分支
-git clone -b 2.3.2-dtk-22.10.1 http://developer.hpccube.com/codes/aicomponent/paddle.git
-cd Paddle
-# 创建编译目录
+  So far you have already been familiar with Fluid. And the next step should be building a more efficient model or inventing your original Operator.
-mkdir build && cd build
-# 指定 Paddle 版本
+- [API Reference](https://www.paddlepaddle.org.cn/documentation/docs/en/api/index_en.html)
-export PADDLE_VERSION=2.3.2
-# 执行 cmake
+   Our new API enables much shorter programs.
-export ROCM_PATH=/opt/rocm
-cmake .. -DPY_VERSION=3.7 -DWITH_GPU=OFF -DWITH_ROCM=ON -DWITH_RCCL=ON -DWITH_NCCL=OFF -DWITH_TESTING=ON -DWITH_DISTRIBUTE=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_EXPORT_COMPILE_COMMANDS=ON -DCMAKE_VERBOSE_MAKEFILE=OFF -DWITH_TP_CACHE=ON -DROCM_PATH=${ROCM_PATH} -DWITH_MKLDNN=OFF
+- [How to Contribute](https://www.paddlepaddle.org.cn/documentation/docs/en/guides/08_contribution/index_en.html)
-# 使用以下命令来编译
+   We appreciate your contributions!
-make -j$(nproc)
-```
-**第三步**：安装与验证编译生成的 wheel 包
-编译完成之后进入`Paddle/build/python/dist`目录即可找到编译生成的.whl 安装包，安装与验证命令如下：
+## Communication
-```bash
+- [Github Issues](https://github.com/PaddlePaddle/Paddle/issues): bug reports, feature requests, install issues, usage issues, etc.
-# 安装命令
+- QQ discussion group: 441226485 (PaddlePaddle).
-python -m pip install -U paddlepaddle_rocm-2.3.2-cp37-cp37m-linux_x86_64.whl
+- [Forums](https://aistudio.baidu.com/paddle/forum): discuss implementations, research, etc.
-# 验证命令
-python -c "import paddle; paddle.utils.run_check()"
-```
-## 如何卸载
+## Courses
-请使用以下命令卸载 Paddle：
+- [Server Deployments](https://aistudio.baidu.com/aistudio/course/introduce/19084): Courses introducing high performance server deployments via local and remote services.
+- [Edge Deployments](https://aistudio.baidu.com/aistudio/course/introduce/22690): Courses introducing edge deployments from mobile, IoT to web and applets.
-```
-pip3 uninstall paddlepaddle-rocm
-```
+## Copyright and License
+PaddlePaddle is provided under the [Apache-2.0 license](LICENSE).
--- a/README_ORIGIN.md
+++ b/README_ORIGIN.md
-<p align="center">
-<img align="center" src="doc/imgs/logo.png", width=1600>
-<p>
--------------------------------------------------------------------------------
-English | [简体中文](./README_cn.md)
-[![Build Status](https://travis-ci.org/PaddlePaddle/Paddle.svg?branch=develop)](https://travis-ci.org/PaddlePaddle/Paddle)
-[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](https://paddlepaddle.org.cn/documentation/docs/en/guides/index_en.html)
-[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](https://paddlepaddle.org.cn/documentation/docs/zh/guides/index_cn.html)
-[![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle.svg)](https://github.com/PaddlePaddle/Paddle/releases)
-[![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE)
-Welcome to the PaddlePaddle GitHub.
-PaddlePaddle, as the first independent R&D deep learning platform in China, has been officially open-sourced to professional communities since 2016. It is an industrial platform with advanced technologies and rich features that cover core deep learning frameworks, basic model libraries, end-to-end development kits, tools & components as well as service platforms.
-PaddlePaddle is originated from industrial practices with dedication and commitments to industrialization. It has been widely adopted by a wide range of sectors including manufacturing, agriculture, enterprise service, and so on while serving more than 4.7 million developers, 180,000 companies and generating 560,000 models. With such advantages, PaddlePaddle has helped an increasing number of partners commercialize AI.
-## Installation
-### Latest PaddlePaddle Release: [v2.3](https://github.com/PaddlePaddle/Paddle/tree/release/2.3)
-Our vision is to enable deep learning for everyone via PaddlePaddle.
-Please refer to our [release announcement](https://github.com/PaddlePaddle/Paddle/releases) to track the latest features of PaddlePaddle.
-### Install Latest Stable Release:
-```
-# CPU
-pip install paddlepaddle
-# GPU
-pip install paddlepaddle-gpu
-```
-For more information about installation, please view [Quick Install](https://www.paddlepaddle.org.cn/install/quick)
-Now our developers can acquire Tesla V100 online computing resources for free. If you create a program by AI Studio, you will obtain 8 hours to train models online per day. [Click here to start](https://aistudio.baidu.com/aistudio/index).
-## FOUR LEADING TECHNOLOGIES
- **Agile Framework for Industrial Development of Deep Neural Networks**
-    The PaddlePaddle deep learning framework facilitates the development while lowering the technical burden, through leveraging a programmable scheme to architect the neural networks. It supports both declarative programming and imperative programming with both development flexibility and high runtime performance preserved.  The neural architectures could be automatically designed by algorithms with better performance than the ones designed by human experts.
-  **Support Ultra-Large-Scale Training of Deep Neural Networks**
-    PaddlePaddle has made breakthroughs in ultra-large-scale deep neural networks training. It launched the world's first large-scale open-source training platform that supports the training of deep networks with 100 billion features and trillions of parameters using data sources distributed over hundreds of nodes. PaddlePaddle overcomes the online deep learning challenges for ultra-large-scale deep learning models, and further achieved real-time model updating with more than 1 trillion parameters.
-     [Click here to learn more](https://github.com/PaddlePaddle/Fleet)
- **High-Performance Inference Engines for Comprehensive Deployment Environments**
-   PaddlePaddle is not only compatible with models trained in 3rd party open-source frameworks , but also offers complete inference products for various production scenarios. Our inference product line includes [Paddle Inference](https://paddle-inference.readthedocs.io/en/master/guides/introduction/index_intro.html): Native inference library for high-performance server and cloud inference; [Paddle Serving](https://github.com/PaddlePaddle/Serving): A service-oriented framework suitable for distributed and pipeline productions; [Paddle Lite](https://github.com/PaddlePaddle/Paddle-Lite): Ultra-Lightweight inference engine for mobile and IoT environments; [Paddle.js](https://www.paddlepaddle.org.cn/paddle/paddlejs): A frontend inference engine for browser and mini-apps. Furthermore, by great amounts of optimization with leading hardware in each scenario, Paddle inference engines outperform most of the other mainstream frameworks.
- **Industry-Oriented Models and Libraries with Open Source Repositories**
-     PaddlePaddle includes and maintains more than 100 mainstream models that have been practiced and polished for a long time in the industry. Some of these models have won major prizes from key international competitions. In the meanwhile, PaddlePaddle has further more than 200 pre-training models (some of them with source codes) to facilitate the rapid development of industrial applications.
-     [Click here to learn more](https://github.com/PaddlePaddle/models)
-## Documentation
-We provide [English](https://www.paddlepaddle.org.cn/documentation/docs/en/guides/index_en.html) and
-[Chinese](https://www.paddlepaddle.org.cn/documentation/docs/zh/guide/index_cn.html) documentation.
- [Guides](https://www.paddlepaddle.org.cn/documentation/docs/en/guides/index_en.html)
-  You might want to start from how to implement deep learning basics with PaddlePaddle.
- [Practice](https://www.paddlepaddle.org.cn/documentation/docs/zh/tutorial/index_cn.html)
-  So far you have already been familiar with Fluid. And the next step should be building a more efficient model or inventing your original Operator. 
- [API Reference](https://www.paddlepaddle.org.cn/documentation/docs/en/api/index_en.html)
-   Our new API enables much shorter programs.
- [How to Contribute](https://www.paddlepaddle.org.cn/documentation/docs/en/guides/08_contribution/index_en.html)
-   We appreciate your contributions!
-## Communication
- [Github Issues](https://github.com/PaddlePaddle/Paddle/issues): bug reports, feature requests, install issues, usage issues, etc.
- QQ discussion group: 441226485 (PaddlePaddle).
- [Forums](https://aistudio.baidu.com/paddle/forum): discuss implementations, research, etc.
-## Courses
- [Server Deployments](https://aistudio.baidu.com/aistudio/course/introduce/19084): Courses introducing high performance server deployments via local and remote services.
- [Edge Deployments](https://aistudio.baidu.com/aistudio/course/introduce/22690): Courses introducing edge deployments from mobile, IoT to web and applets.
-## Copyright and License
-PaddlePaddle is provided under the [Apache-2.0 license](LICENSE).
--- a/README_cn.md
+++ b/README_cn.md
+<p align="center">
+<img align="center" src="doc/imgs/logo.png", width=1600>
+<p>
+--------------------------------------------------------------------------------
+[English](./README.md) | 简体中文 | [日本語](./README_ja.md)
+[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](https://paddlepaddle.org.cn/documentation/docs/en/guides/index_en.html)
+[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](https://paddlepaddle.org.cn/documentation/docs/zh/guides/index_cn.html)
+[![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle.svg)](https://github.com/PaddlePaddle/Paddle/releases)
+[![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE)
+欢迎来到 PaddlePaddle GitHub
+飞桨(PaddlePaddle)以百度多年的深度学习技术研究和业务应用为基础，是中国首个自主研发、功能完备、 开源开放的产业级深度学习平台，集深度学习核心训练和推理框架、基础模型库、端到端开发套件和丰富的工具组件于一体。目前，飞桨累计开发者535万，服务企业20万家，基于飞桨开源深度学习平台产生了67万个模型。飞桨助力开发者快速实现AI想法，快速上线AI业务。帮助越来越多的行业完成AI赋能，实现产业智能化升级。
+## 安装
+### PaddlePaddle最新版本: [v2.5](https://github.com/PaddlePaddle/Paddle/tree/release/2.5)
+跟进PaddlePaddle最新特性请参考我们的[版本说明](https://github.com/PaddlePaddle/Paddle/releases)
+### 安装最新稳定版本:
+```
+# CPU
+pip install paddlepaddle
+# GPU
+pip install paddlepaddle-gpu
+```
+更多安装信息详见官网 [安装说明](https://www.paddlepaddle.org.cn/install/quick)
+PaddlePaddle用户可领取**免费Tesla V100在线算力资源**，训练模型更高效。**每日登陆即送8小时**，[前往使用免费算力](https://aistudio.baidu.com/aistudio/index)。
+## 四大领先技术
+- **开发便捷的产业级深度学习框架**
+    飞桨深度学习框架采用基于编程逻辑的组网范式，对于普通开发者而言更容易上手，符合他们的开发习惯。同时支持声明式和命令式编程，兼具开发的灵活性和高性能。网络结构自动设计，模型效果超越人类专家。
+- **支持超大规模深度学习模型的训练**
+    飞桨突破了超大规模深度学习模型训练技术，实现了支持千亿特征、万亿参数、数百节点的开源大规模训练平台，攻克了超大规模深度学习模型的在线学习难题，实现了万亿规模参数模型的实时更新。
+    [查看详情](https://github.com/PaddlePaddle/Fleet)
+- **支持多端多平台的高性能推理部署工具**
+    飞桨不仅广泛兼容第三方开源框架训练的模型部署，并且为不同的场景的生产环境提供了完备的推理引擎，包括适用于高性能服务器及云端推理的原生推理库 [Paddle Inference](https://www.paddlepaddle.org.cn/inference/product_introduction/inference_intro.html)，面向分布式、流水线生产环境下自动上云、A/B测试等高阶功能的服务化推理框架 [Paddle Serving](https://github.com/PaddlePaddle/Serving)，针对于移动端、物联网场景的轻量化推理引擎 [Paddle Lite](https://github.com/PaddlePaddle/Paddle-Lite)，以及在浏览器、小程序等环境下使用的前端推理引擎 [Paddle.js](https://www.paddlepaddle.org.cn/paddle/paddlejs)。同时，透过与不同场景下的主流硬件高度适配优化及异构计算的支持, 飞桨的推理性能也领先绝大部分的主流实现。
+- **面向产业应用，开源开放覆盖多领域的工业级模型库。**
+    飞桨官方支持100多个经过产业实践长期打磨的主流模型，其中包括在国际竞赛中夺得冠军的模型；同时开源开放200多个预训练模型，助力快速的产业应用。
+    [查看详情](https://github.com/PaddlePaddle/models)
+## 文档
+我们提供 [英文](https://www.paddlepaddle.org.cn/documentation/docs/en/guides/index_en.html) 和
+[中文](https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/index_cn.html) 文档
+- [使用指南](https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/index_cn.html)：或许您想从深度学习基础开始学习飞桨
+- [应用实践](https://www.paddlepaddle.org.cn/documentation/docs/zh/tutorial/index_cn.html)：使用飞桨搭建您的模型，更高效的完成深度学习任务
+- [API 文档](https://www.paddlepaddle.org.cn/documentation/docs/zh/api/index_cn.html)：新的 API 支持代码更少更简洁的程序
+- [贡献方式](https://www.paddlepaddle.org.cn/documentation/docs/zh/guides/08_contribution/index_cn.html)：欢迎您的贡献!
+## 交流与反馈
+- 欢迎您通过[Github Issues](https://github.com/PaddlePaddle/Paddle/issues)来提交问题、报告与建议
+- QQ群: 441226485 (PaddlePaddle)
+- [论坛](https://aistudio.baidu.com/paddle/forum): 欢迎大家在PaddlePaddle论坛分享在使用PaddlePaddle中遇到的问题和经验，营造良好的论坛氛围
+## 课程
+- [服务器部署](https://aistudio.baidu.com/aistudio/course/introduce/19084): 详细介绍高性能服务器端部署实操，包含本地端及服务化Serving部署等
+- [端侧部署](https://aistudio.baidu.com/aistudio/course/introduce/22690): 详细介绍端侧多场景部署实操，从移动端设备、IoT、网页到小程序部署
+## 版权和许可证
+PaddlePaddle由[Apache-2.0 license](LICENSE)提供
--- a/README_ja.md
+++ b/README_ja.md
+<p align="center">
+<img align="center" src="doc/imgs/logo.png", width=1600>
+<p>
+--------------------------------------------------------------------------------
+[English](./README.md) | [简体中文](./README_cn.md) | 日本語
+[![Documentation Status](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](https://paddlepaddle.org.cn/documentation/docs/en/guides/index_en.html)
+[![Documentation Status](https://img.shields.io/badge/中文文档-最新-brightgreen.svg)](https://paddlepaddle.org.cn/documentation/docs/zh/guides/index_cn.html)
+[![Release](https://img.shields.io/github/release/PaddlePaddle/Paddle.svg)](https://github.com/PaddlePaddle/Paddle/releases)
+[![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE)
+[![Twitter](https://img.shields.io/badge/Twitter-1ca0f1.svg?logo=twitter&logoColor=white)](https://twitter.com/PaddlePaddle_)
+PaddlePaddle GitHub へようこそ。
+PaddlePaddle は中国初の独立系 R&D ディープラーニングプラットフォームとして、2016年からプロのコミュニティに正式にオープンソース化されました。コアとなる深層学習フレームワーク、基本モデルライブラリ、エンドツーエンドの開発キット、ツール＆コンポーネント、さらにサービスプラットフォームを網羅する、高度な技術と豊富な機能を備えた産業プラットフォームです。
+PaddlePaddle は、工業化に対するコミットメントを持つ工業的実践から生まれたものです。製造業、農業、企業サービスなど幅広い分野で採用され、535万人以上の開発者、20万以上の企業、67万以上のモデルを生み出しています。それにより PaddlePaddle は、ますます多くのパートナーの AI 商用化を支援しています。
+## インストール
+### PaddlePaddle の最新リリース: [v2.5](https://github.com/PaddlePaddle/Paddle/tree/release/2.5)
+私たちのビジョンは、PaddlePaddle を通じて、誰もが深層学習を行えるようにすることです。
+PaddlePaddle の最新機能を追跡するために、私たちの[リリースのお知らせ](https://github.com/PaddlePaddle/Paddle/releases)を参照してください。
+### 最新の安定版リリースのインストール:
+```
+# CPU
+pip install paddlepaddle
+# GPU
+pip install paddlepaddle-gpu
+```
+インストール方法については、[クイックインストール](https://www.paddlepaddle.org.cn/install/quick)をご覧ください
+この度、開発者の皆様が Tesla V100 のオンライン計算資源を無償で取得できるようになりました。AI Studio でプログラムを作成した場合、1日あたり8時間のオンライン学習が可能です。[スタートはこちら](https://aistudio.baidu.com/aistudio/index)。
+## 四大技術
+- **ディープニューラルネットワークの産業用開発のためのアジャイルフレームワーク**
+    PaddlePaddle ディープラーニングフレームワークは、ニューラルネットワークをアーキテクトするプログラマブルスキームを活用することで、技術的負担を軽減しながら開発を容易にする。宣言型プログラミングと命令型プログラミングの両方をサポートし、開発の柔軟性と高い実行性能を両立しています。 ニューラル・アーキテクチャは、アルゴリズムによって自動的に設計され、人間の専門家が設計したものよりも優れた性能を発揮する可能性があります。
+-  **ディープニューラルネットワークの超大規模学習をサポート**
+    PaddlePaddle は、超大規模なディープニューラルネットワークのトレーニングでブレークスルーを起こしました。数百のノードに分散したデータソースを用いて、1000億の特徴量と数兆のパラメータを持つディープネットワークのトレーニングをサポートする、世界初の大規模オープンソース・トレーニング・プラットフォームを立ち上げたのです。PaddlePaddle は、超大規模ディープラーニングモデルのオンラインディープラーニングの課題を克服し、さらに1兆以上のパラメータでリアルタイムにモデル更新を実現しました。
+     [詳しくはこちら](https://github.com/PaddlePaddle/Fleet)
+- **総合的な展開環境に対応した高性能推論エンジン**
+   PaddlePaddle は、サードパーティのオープンソースフレームワークで学習されたモデルとの互換性があるだけでなく、様々な生産シナリオに対応した完全な推論エンジン、システム、スイートを提供しています。当社の推論エンジン、システム、スイートには、[Paddle Inference](https://paddle-inference.readthedocs.io/en/master/guides/introduction/index_intro.html) があります： [Paddle Serving](https://github.com/PaddlePaddle/Serving): 高性能なサーバーおよびクラウド推論用のネイティブ推論ライブラリ： [Paddle Serving](https://github.com/PaddlePaddle/Paddle-Lite): 分散型やパイプライン型プロダクションに適したサービス指向フレームワーク; [Paddle Lite](https://github.com/PaddlePaddle/Paddle-Lite)： モバイルや IoT 環境向けの超軽量推論エンジン; [Paddle.js](https://www.paddlepaddle.org.cn/paddle/paddlejs)： ブラウザやミニアプリのためのフロントエンド推論エンジンです。さらに、各シナリオの主要なハードウェアに最適化することで、Paddle の推論エンジンは他の主流フレームワークのほとんどを凌駕しています。
+- **オープンソースリポジトリによる業界指向のモデルやライブラリ**
+     PaddlePaddle は、業界で長い間実践され、磨かれてきた100以上の主流モデルを含み、維持しています。これらのモデルの中には、主要な国際コンペティションで主要な賞を受賞したものもあります。一方、PaddlePaddle は、産業用アプリケーションの迅速な開発を促進するために、200以上のプレトレーニングモデル（そのうちのいくつかはソースコード付き）をさらに整備しています。
+     [詳しくはこちら](https://github.com/PaddlePaddle/models)
+## ドキュメント
+[英語](https://www.paddlepaddle.org.cn/documentation/docs/en/guides/index_en.html)と
+[中国語](https://www.paddlepaddle.org.cn/documentation/docs/zh/guide/index_cn.html)のドキュメントを提供しています。
+- [ガイド](https://www.paddlepaddle.org.cn/documentation/docs/en/guides/index_en.html)
+  PaddlePaddle でディープラーニングの基本を実装する方法から始めてみてはいかがでしょうか。
+- [プラクティス](https://www.paddlepaddle.org.cn/documentation/docs/zh/tutorial/index_cn.html)
+  Paddle を使ってモデルを構築し、ディープラーニングタスクをより効率的に実行しましょう。
+- [API リファレンス](https://www.paddlepaddle.org.cn/documentation/docs/en/api/index_en.html)
+   新しい API により、より短時間のプログラムが可能となりました。
+- [コントリビュート方法](https://www.paddlepaddle.org.cn/documentation/docs/en/guides/08_contribution/index_en.html)
+   皆様のご投稿に感謝いたします！
+## コミュニケーション
+- [Github Issues](https://github.com/PaddlePaddle/Paddle/issues): バグレポート、機能リクエスト、インストールに関する問題、使用方法に関する問題など。
+- QQディスカッショングループ: 441226485 (PaddlePaddle)です。
+- [フォーラム](https://aistudio.baidu.com/paddle/forum): 実装や研究などについて話し合います。
+## コース
+- [Server Deployments](https://aistudio.baidu.com/aistudio/course/introduce/19084): ローカルサービスやリモートサービスを利用した高性能なサーバー展開を紹介するコースです。
+- [Edge Deployments](https://aistudio.baidu.com/aistudio/course/introduce/22690): モバイル、IoT から Web、アプレットまで、エッジの展開を紹介するコース。
+## Copyright とライセンス
+PaddlePaddle は [Apache-2.0 license](LICENSE) の下で提供されています。
--- a/cmake/PaddleConfig.cmake.in
+++ b/cmake/PaddleConfig.cmake.in
+# Paddle CMake configuration file
+# -------
+#
+# Finds the Paddle library
+#
+# This will define the following variables:
+#
+#   PADDLE_FOUND        -- True if the system has the Paddle library
+#   PADDLE_INCLUDE_DIRS -- The include directories for Paddle
+#   PADDLE_LIBRARIES    -- Libraries to link against
+get_filename_component(PADDLE_INSTALL_PREFIX "${CMAKE_CURRENT_LIST_FILE}/../.." ABSOLUTE)
+# include directories
+set(PADDLE_INCLUDE_DIRS 
+    ${PADDLE_INSTALL_PREFIX}/include
+    ${PADDLE_INSTALL_PREFIX}/include/third_party
+)
+# Library dependencies.
+set(PADDLE_LIBRARIES_DIRS ${PADDLE_INSTALL_PREFIX}/lib)
+link_directories(${PADDLE_LIBRARIES_DIRS})
+file(GLOB PADDLE_LIBRARIES ${PADDLE_LIBRARIES_DIRS}/lib*)
+find_package(PythonLibs @PY_VERSION@ REQUIRED)
+list(APPEND PADDLE_INCLUDE_DIRS ${PYTHON_INCLUDE_DIRS})
+list(APPEND PADDLE_LIBRARIES ${PYTHON_LIBRARIES})
+if(@WITH_GPU@)
+    find_package(CUDA @CUDA_VERSION@ REQUIRED)
+    list(APPEND PADDLE_LIBRARIES ${CUDA_LIBRARIES})
+endif()
--- a/cmake/cblas.cmake
+++ b/cmake/cblas.cmake
@@ -40,7 +40,6 @@ if(WITH_MKLML)
  add_definitions(-DLAPACK_FOUND)
  add_dependencies(cblas mklml)
-  target_link_libraries(cblas dynload_mklml)
  message(STATUS "Found cblas and lapack in MKLML "
                 "(include: ${CBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})")
@@ -96,6 +95,7 @@ if(NOT DEFINED CBLAS_PROVIDER)
        STATUS
          "Found OpenBLAS (include: ${OPENBLAS_INC_DIR}, library: ${CBLAS_LIBRARIES})"
      )
      message(
        STATUS "Found lapack in OpenBLAS (include: ${OPENBLAS_LAPACKE_INC_DIR})"
      )

--- a/cmake/cinn.cmake
+++ b/cmake/cinn.cmake
+set(CINN_THIRD_PARTY_PATH "${CMAKE_BINARY_DIR}/third_party")
+set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
+set(DOWNLOAD_MODEL_DIR "${CINN_THIRD_PARTY_PATH}/model")
+string(REGEX MATCH "-std=(c\\+\\+[^ ]+)" STD_FLAG "${CMAKE_CXX_FLAGS}")
+if(NOT STD_FLAG)
+  if(NOT CMAKE_CXX_STANDARD)
+    message(
+      STATUS
+        "STD_FLAG and CMAKE_CXX_STANDARD not found, using default flag: -std=c++17"
+    )
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++17")
+    set(CMAKE_CXX_STANDARD 17)
+  else()
+    message(
+      STATUS
+        "Got CMAKE_CXX_STANDARD=${CMAKE_CXX_STANDARD}, append -std=c++${CMAKE_CXX_STANDARD} to CMAKE_CXX_FLAGS"
+    )
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++${CMAKE_CXX_STANDARD}")
+  endif()
+else()
+  string(REGEX MATCH "([0-9]+)" STD_VALUE "${STD_FLAG}")
+  message(
+    STATUS "Got STD_FLAG=${STD_FLAG}, set CMAKE_CXX_STANDARD=${STD_VALUE}")
+  set(CMAKE_CXX_STANDARD ${STD_VALUE})
+endif()
+if(NOT DEFINED ENV{runtime_include_dir})
+  message(
+    STATUS
+      "set runtime_include_dir: ${CMAKE_SOURCE_DIR}/paddle/cinn/runtime/cuda")
+  set(ENV{runtime_include_dir} "${CMAKE_SOURCE_DIR}/paddle/cinn/runtime/cuda")
+  add_definitions(
+    -DRUNTIME_INCLUDE_DIR="${CMAKE_SOURCE_DIR}/paddle/cinn/runtime/cuda")
+endif()
+if(WITH_TESTING)
+  add_definitions(-DCINN_WITH_TEST)
+endif()
+if(WITH_DEBUG)
+  add_definitions(-DCINN_WITH_DEBUG)
+endif()
+# TODO(zhhsplendid): CINN has lots of warnings during early development.
+# They will be treated as errors under paddle. We set no-error now and we will
+# clean the code in the future.
+add_definitions(-w)
+include(cmake/cinn/version.cmake)
+if(NOT EXISTS ${CMAKE_BINARY_DIR}/cmake/cinn/config.cmake)
+  file(COPY ${PROJECT_SOURCE_DIR}/cmake/cinn/config.cmake
+       DESTINATION ${CMAKE_BINARY_DIR}/cmake/cinn)
+endif()
+include(${CMAKE_BINARY_DIR}/cmake/cinn/config.cmake)
+if(WITH_MKL)
+  generate_dummy_static_lib(LIB_NAME "cinn_mklml" GENERATOR "mklml.cmake")
+  target_link_libraries(cinn_mklml ${MKLML_LIB} ${MKLML_IOMP_LIB})
+  add_dependencies(cinn_mklml ${MKLML_PROJECT})
+  add_definitions(-DCINN_WITH_MKL_CBLAS)
+endif()
+if(WITH_MKLDNN)
+  add_definitions(-DCINN_WITH_DNNL)
+endif()
+if(WITH_GPU)
+  message(STATUS "Enable CINN CUDA")
+  add_definitions(-DCINN_WITH_CUDA)
+  if(WITH_CUDNN)
+    message(STATUS "Enable CINN CUDNN")
+    add_definitions(-DCINN_WITH_CUDNN)
+  endif()
+  enable_language(CUDA)
+  find_package(CUDA REQUIRED)
+  include_directories(${CUDA_INCLUDE_DIRS})
+  include_directories(${CMAKE_SOURCE_DIR}/paddle/cinn/runtime/cuda)
+  include_directories(/usr/lib/x86_64-linux-gnu)
+  set(CUDA_SEPARABLE_COMPILATION ON)
+  cuda_select_nvcc_arch_flags(ARCH_FLAGS Auto)
+  list(APPEND CUDA_NVCC_FLAGS ${ARCH_FLAGS})
+  set(CMAKE_CUDA_STANDARD ${CMAKE_CXX_STANDARD})
+  message(
+    STATUS
+      "copy paddle/cinn/common/float16.h paddle/cinn/common/bfloat16.h to $ENV{runtime_include_dir}"
+  )
+  file(COPY paddle/cinn/common/float16.h paddle/cinn/common/bfloat16.h
+       DESTINATION $ENV{runtime_include_dir})
+  find_library(CUDASTUB libcuda.so HINTS ${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs/
+                                         REQUIRED)
+  find_library(CUBLAS libcublas.so HINTS ${CUDA_TOOLKIT_ROOT_DIR}/lib64
+                                         /usr/lib /usr/lib64 REQUIRED)
+  find_library(CUDNN libcudnn.so HINTS ${CUDA_TOOLKIT_ROOT_DIR}/lib64 /usr/lib
+                                       /usr/lib64 REQUIRED)
+  find_library(CURAND libcurand.so HINTS ${CUDA_TOOLKIT_ROOT_DIR}/lib64
+                                         /usr/lib /usr/lib64 REQUIRED)
+  find_library(CUSOLVER libcusolver.so HINTS ${CUDA_TOOLKIT_ROOT_DIR}/lib64
+                                             /usr/lib /usr/lib64 REQUIRED)
+endif()
+set(cinnapi_src CACHE INTERNAL "" FORCE)
+set(core_src CACHE INTERNAL "" FORCE)
+set(core_includes CACHE INTERNAL "" FORCE)
+set(core_proto_includes CACHE INTERNAL "" FORCE)
+include_directories(${CMAKE_SOURCE_DIR})
+include_directories(${CMAKE_BINARY_DIR})
+include(cmake/generic.cmake)
+include(cmake/cinn/system.cmake)
+include(cmake/cinn/core.cmake)
+include(cmake/cinn/nvrtc.cmake)
+include(cmake/cinn/nvtx.cmake)
+if(CINN_ONLY)
+  link_libraries(gflags)
+endif()
+set(LINK_FLAGS
+    "-Wl,--version-script ${CMAKE_CURRENT_SOURCE_DIR}/cmake/cinn/export.map"
+    CACHE INTERNAL "")
+set(global_test_args
+    "--cinn_x86_builtin_code_root=${CMAKE_SOURCE_DIR}/paddle/cinn/backends")
+set(Python_VIRTUALENV FIRST)
+if(NOT PYTHON_EXECUTABLE)
+  find_package(PythonInterp ${PY_VERSION} REQUIRED)
+endif()
+if(NOT PYTHON_LIBRARIES)
+  find_package(PythonLibs ${PY_VERSION} REQUIRED)
+endif()
+message(STATUS "PYTHON_LIBRARIES: ${PYTHON_LIBRARIES}")
+message(STATUS "PYTHON_INCLUDE_DIR: ${PYTHON_INCLUDE_DIR}")
+include_directories(${PYTHON_INCLUDE_DIR})
+set(core_deps CACHE INTERNAL "" FORCE)
+set(hlir_src CACHE INTERNAL "" FORCE)
+# TODO(chenweihang): The logic later depends adding cinn subdirectory here,
+# but better to move to paddle/CMakeLists.txt
+add_subdirectory(paddle/cinn)
+set(core_src "${cinnapi_src}")
+cinn_cc_library(
+  cinnapi
+  SHARED
+  SRCS
+  ${cinnapi_src}
+  DEPS
+  glog
+  ${llvm_libs}
+  cinn_framework_proto
+  param_proto
+  auto_schedule_proto
+  schedule_desc_proto
+  absl
+  isl
+  ginac
+  pybind
+  ${jitify_deps})
+add_dependencies(cinnapi GEN_LLVM_RUNTIME_IR_HEADER ZLIB::ZLIB)
+add_dependencies(cinnapi GEN_LLVM_RUNTIME_IR_HEADER ${core_deps})
+if(NOT CINN_ONLY)
+  target_link_libraries(cinnapi phi)
+  add_dependencies(cinnapi phi)
+endif()
+target_link_libraries(cinnapi ${PYTHON_LIBRARIES})
+if(WITH_MKL)
+  target_link_libraries(cinnapi cinn_mklml)
+  add_dependencies(cinnapi cinn_mklml)
+  if(WITH_MKLDNN)
+    target_link_libraries(cinnapi ${MKLDNN_LIB})
+    add_dependencies(cinnapi ${MKLDNN_PROJECT})
+  endif()
+endif()
+if(WITH_GPU)
+  target_link_libraries(
+    cinnapi
+    ${CUDA_NVRTC_LIB}
+    ${CUDA_LIBRARIES}
+    ${CUDASTUB}
+    ${CUBLAS}
+    ${CUDNN}
+    ${CURAND}
+    ${CUSOLVER})
+  if(NVTX_FOUND)
+    target_link_libraries(cinnapi ${CUDA_NVTX_LIB})
+  endif()
+endif()
+function(gen_cinncore LINKTYPE)
+  set(CINNCORE_TARGET cinncore)
+  if(${LINKTYPE} STREQUAL "STATIC")
+    set(CINNCORE_TARGET cinncore_static)
+  endif()
+  cinn_cc_library(
+    ${CINNCORE_TARGET}
+    ${LINKTYPE}
+    SRCS
+    ${core_src}
+    DEPS
+    glog
+    ${llvm_libs}
+    cinn_framework_proto
+    param_proto
+    auto_schedule_proto
+    schedule_desc_proto
+    absl
+    isl
+    ginac)
+  add_dependencies(${CINNCORE_TARGET} GEN_LLVM_RUNTIME_IR_HEADER ZLIB::ZLIB)
+  add_dependencies(${CINNCORE_TARGET} GEN_LLVM_RUNTIME_IR_HEADER ${core_deps})
+  if(NOT CINN_ONLY)
+    target_link_libraries(${CINNCORE_TARGET} phi)
+    add_dependencies(${CINNCORE_TARGET} phi)
+  endif()
+  add_dependencies(${CINNCORE_TARGET} pybind)
+  target_link_libraries(${CINNCORE_TARGET} ${PYTHON_LIBRARIES})
+  if(WITH_MKL)
+    target_link_libraries(${CINNCORE_TARGET} cinn_mklml)
+    add_dependencies(${CINNCORE_TARGET} cinn_mklml)
+    if(WITH_MKLDNN)
+      target_link_libraries(${CINNCORE_TARGET} ${MKLDNN_LIB})
+      add_dependencies(${CINNCORE_TARGET} ${MKLDNN_PROJECT})
+    endif()
+  endif()
+  if(WITH_GPU)
+    target_link_libraries(
+      ${CINNCORE_TARGET}
+      ${CUDA_NVRTC_LIB}
+      ${CUDA_LIBRARIES}
+      ${CUDASTUB}
+      ${CUBLAS}
+      ${CUDNN}
+      ${CURAND}
+      ${CUSOLVER}
+      ${jitify_deps})
+    if(NVTX_FOUND)
+      target_link_libraries(${CINNCORE_TARGET} ${CUDA_NVTX_LIB})
+    endif()
+  endif()
+endfunction()
+gen_cinncore(STATIC)
+gen_cinncore(SHARED)
+# --------distribute cinncore lib and include begin--------
+set(PUBLISH_LIBS ON)
+if(PUBLISH_LIBS)
+  set(core_includes
+      "${core_includes};paddle/cinn/runtime/cuda/cinn_cuda_runtime_source.cuh")
+  foreach(header ${core_includes})
+    get_filename_component(prefix ${header} DIRECTORY)
+    file(COPY ${header}
+         DESTINATION ${CMAKE_BINARY_DIR}/dist/cinn/include/${prefix})
+  endforeach()
+  foreach(proto_header ${core_proto_includes})
+    string(REPLACE ${CMAKE_BINARY_DIR}/ "" relname ${proto_header})
+    get_filename_component(prefix ${relname} DIRECTORY)
+    set(target_name ${CMAKE_BINARY_DIR}/dist/cinn/include/${relname})
+    add_custom_command(
+      TARGET cinnapi
+      POST_BUILD
+      COMMENT "copy generated proto header '${relname}' to dist"
+      COMMAND cmake -E copy ${proto_header} ${target_name} DEPENDS cinnapi)
+  endforeach()
+  add_custom_command(
+    TARGET cinnapi
+    POST_BUILD
+    COMMAND cmake -E copy ${CMAKE_BINARY_DIR}/libcinnapi.so
+            ${CMAKE_BINARY_DIR}/dist/cinn/lib/libcinnapi.so
+    COMMAND cmake -E copy_directory ${CINN_THIRD_PARTY_PATH}/install
+            ${CMAKE_BINARY_DIR}/dist/third_party DEPENDS cinnapi)
+  add_custom_command(
+    TARGET cinncore_static
+    POST_BUILD
+    COMMAND
+      cmake -E copy ${PROJECT_SOURCE_DIR}/tools/cinn/tutorials_demo/demo.cc
+      ${CMAKE_BINARY_DIR}/dist/demo.cc
+    COMMAND
+      cmake -E copy
+      ${PROJECT_SOURCE_DIR}/tools/cinn/tutorials_demo/build_demo.sh
+      ${CMAKE_BINARY_DIR}/dist/build_demo.sh
+    COMMAND cmake -E copy ${CMAKE_BINARY_DIR}/libcinncore_static.a
+            ${CMAKE_BINARY_DIR}/dist/cinn/lib/libcinncore_static.a
+    COMMAND
+      cmake -E copy
+      ${CMAKE_BINARY_DIR}/paddle/cinn/frontend/paddle/libcinn_framework_proto.a
+      ${CMAKE_BINARY_DIR}/dist/cinn/lib/libcinn_framework_proto.a
+    COMMAND
+      cmake -E copy ${CMAKE_BINARY_DIR}/paddle/cinn/hlir/pe/libparam_proto.a
+      ${CMAKE_BINARY_DIR}/dist/cinn/lib/libparam_proto.a
+    COMMAND
+      cmake -E copy
+      ${CMAKE_BINARY_DIR}/paddle/cinn/auto_schedule/libauto_schedule_proto.a
+      ${CMAKE_BINARY_DIR}/dist/cinn/lib/libauto_schedule_proto.a
+    COMMAND
+      cmake -E copy
+      ${CMAKE_BINARY_DIR}/paddle/cinn/ir/schedule/libschedule_desc_proto.a
+      ${CMAKE_BINARY_DIR}/dist/cinn/lib/libschedule_desc_proto.a
+    COMMENT "distribute libcinncore_static.a and related header files." DEPENDS
+            cinncore_static)
+endif()
+# --------distribute cinncore lib and include end--------
+set(CINN_LIB_NAME "libcinnapi.so")
+set(CINN_LIB_LOCATION "${CMAKE_BINARY_DIR}/dist/cinn/lib")
+set(CINN_LIB "${CINN_LIB_LOCATION}/${CINN_LIB_NAME}")
+######################################
+# Add CINN's dependencies header files
+######################################
+# Add isl
+set(ISL_INCLUDE_DIR "${CMAKE_BINARY_DIR}/third_party/install/isl/include")
+include_directories(${ISL_INCLUDE_DIR})
+# Add LLVM
+set(LLVM_INCLUDE_DIR "${CMAKE_BINARY_DIR}/dist/third_party/llvm/include")
+include_directories(${LLVM_INCLUDE_DIR})
+######################################################
+# Put external_cinn and dependencies together as a lib
+######################################################
+set(CINN_INCLUDE_DIR "${CMAKE_BINARY_DIR}/dist/cinn/include")
+include_directories(${CINN_INCLUDE_DIR})
--- a/cmake/cinn/config.cmake
+++ b/cmake/cinn/config.cmake
+# The home path of ISL
+# Required!
+set(ISL_HOME "")
+set(USE_OPENMP "intel")
--- a/cmake/cinn/core.cmake
+++ b/cmake/cinn/core.cmake
+set(CMAKE_CXX_FLAGS
+    "${CMAKE_CXX_FLAGS} -fPIC -mavx -mfma -Wno-write-strings -Wno-psabi")
+set(PADDLE_RESOURCE_URL
+    "http://paddle-inference-dist.bj.bcebos.com"
+    CACHE STRING "inference download url")
+function(cinn_cc_library TARGET_NAME)
+  set(options STATIC static SHARED shared)
+  set(oneValueArgs "")
+  set(multiValueArgs SRCS DEPS)
+  cmake_parse_arguments(cinn_cc_library "${options}" "${oneValueArgs}"
+                        "${multiValueArgs}" ${ARGN})
+  if(cinn_cc_library_SRCS)
+    if(cinn_cc_library_SHARED OR cinn_cc_library_shared) # build *.so
+      add_library(${TARGET_NAME} SHARED ${cinn_cc_library_SRCS})
+    else()
+      add_library(${TARGET_NAME} STATIC ${cinn_cc_library_SRCS})
+    endif()
+    if(cinn_cc_library_DEPS)
+      # Don't need link libwarpctc.so
+      target_link_libraries(${TARGET_NAME} ${cinn_cc_library_DEPS})
+      add_dependencies(${TARGET_NAME} ${cinn_cc_library_DEPS})
+    endif()
+    # cpplint code style
+    foreach(source_file ${cinn_cc_library_SRCS})
+      string(REGEX REPLACE "\\.[^.]*$" "" source ${source_file})
+      if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h)
+        list(APPEND cinn_cc_library_HEADERS
+             ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h)
+      endif()
+    endforeach()
+  else()
+    if(cinn_cc_library_DEPS)
+      cinn_merge_static_libs(${TARGET_NAME} ${cinn_cc_library_DEPS})
+    else()
+      message(
+        FATAL_ERROR
+          "Please specify source files or libraries in cinn_cc_library(${TARGET_NAME} ...)."
+      )
+    endif()
+  endif()
+  if((NOT ("${TARGET_NAME}" STREQUAL "cinn_gtest_main"))
+     AND (NOT ("${TARGET_NAME}" STREQUAL "utils"))
+     AND (NOT ("${TARGET_NAME}" STREQUAL "lib")))
+    target_link_libraries(${TARGET_NAME} Threads::Threads)
+  endif(
+    (NOT ("${TARGET_NAME}" STREQUAL "cinn_gtest_main"))
+    AND (NOT ("${TARGET_NAME}" STREQUAL "utils"))
+    AND (NOT ("${TARGET_NAME}" STREQUAL "lib")))
+endfunction()
+list(APPEND CMAKE_CTEST_ARGUMENTS)
+function(remove_gflags TARGET_NAME)
+  get_target_property(TARGET_LIBRARIES ${TARGET_NAME} LINK_LIBRARIES)
+  list(REMOVE_ITEM TARGET_LIBRARIES glog)
+  list(REMOVE_ITEM TARGET_LIBRARIES gflags)
+  set_property(TARGET ${TARGET_NAME} PROPERTY LINK_LIBRARIES
+                                              ${TARGET_LIBRARIES})
+endfunction()
+function(cinn_cc_test TARGET_NAME)
+  if(WITH_TESTING)
+    set(options SERIAL)
+    set(oneValueArgs "")
+    set(multiValueArgs SRCS DEPS ARGS)
+    cmake_parse_arguments(cinn_cc_test "${options}" "${oneValueArgs}"
+                          "${multiValueArgs}" ${ARGN})
+    add_executable(${TARGET_NAME} ${cinn_cc_test_SRCS})
+    get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES)
+    target_link_libraries(${TARGET_NAME} ${os_dependency_modules}
+                          cinn_gtest_main gtest glog ${cinn_cc_test_DEPS})
+    add_dependencies(${TARGET_NAME} cinn_gtest_main gtest glog
+                     ${cinn_cc_test_DEPS})
+    add_test(
+      NAME ${TARGET_NAME}
+      COMMAND ${TARGET_NAME} "${cinn_cc_test_ARGS}"
+      WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
+    if(${cinn_cc_test_SERIAL})
+      set_property(TEST ${TARGET_NAME} PROPERTY RUN_SERIAL 1)
+    endif()
+    # No unit test should exceed 10 minutes.
+    set_tests_properties(${TARGET_NAME} PROPERTIES TIMEOUT 6000)
+    remove_gflags(${TARGET_NAME})
+  endif()
+endfunction()
+function(cinn_nv_library TARGET_NAME)
+  if(WITH_GPU)
+    set(options STATIC static SHARED shared)
+    set(oneValueArgs "")
+    set(multiValueArgs SRCS DEPS)
+    cmake_parse_arguments(cinn_nv_library "${options}" "${oneValueArgs}"
+                          "${multiValueArgs}" ${ARGN})
+    if(cinn_nv_library_SRCS)
+      if(cinn_nv_library_SHARED OR cinn_nv_library_shared) # build *.so
+        cuda_add_library(${TARGET_NAME} SHARED ${cinn_nv_library_SRCS})
+      else()
+        cuda_add_library(${TARGET_NAME} STATIC ${cinn_nv_library_SRCS})
+      endif()
+      if(cinn_nv_library_DEPS)
+        add_dependencies(${TARGET_NAME} ${cinn_nv_library_DEPS})
+        target_link_libraries(${TARGET_NAME} ${cinn_nv_library_DEPS})
+      endif()
+      # cpplint code style
+      foreach(source_file ${cinn_nv_library_SRCS})
+        string(REGEX REPLACE "\\.[^.]*$" "" source ${source_file})
+        if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h)
+          list(APPEND cinn_nv_library_HEADERS
+               ${CMAKE_CURRENT_SOURCE_DIR}/${source}.h)
+        endif()
+      endforeach()
+    else()
+      if(cinn_nv_library_DEPS)
+        cinn_merge_static_libs(${TARGET_NAME} ${cinn_nv_library_DEPS})
+      else()
+        message(FATAL
+                "Please specify source file or library in cinn_nv_library.")
+      endif()
+    endif()
+    target_link_libraries(${TARGET_NAME} Threads::Threads)
+  endif()
+endfunction()
+function(cinn_nv_binary TARGET_NAME)
+  if(WITH_GPU)
+    set(options "")
+    set(oneValueArgs "")
+    set(multiValueArgs SRCS DEPS)
+    cmake_parse_arguments(cinn_nv_binary "${options}" "${oneValueArgs}"
+                          "${multiValueArgs}" ${ARGN})
+    cuda_add_executable(${TARGET_NAME} ${cinn_nv_binary_SRCS})
+    if(cinn_nv_binary_DEPS)
+      target_link_libraries(${TARGET_NAME} ${cinn_nv_binary_DEPS})
+      add_dependencies(${TARGET_NAME} ${cinn_nv_binary_DEPS})
+      common_link(${TARGET_NAME})
+    endif()
+  endif()
+endfunction()
+function(cinn_nv_test TARGET_NAME)
+  if(WITH_GPU AND WITH_TESTING)
+    set(options SERIAL)
+    set(oneValueArgs "")
+    set(multiValueArgs SRCS DEPS ARGS)
+    cmake_parse_arguments(cinn_nv_test "${options}" "${oneValueArgs}"
+                          "${multiValueArgs}" ${ARGN})
+    # Attention:
+    # 1. cuda_add_executable is deprecated after cmake v3.10, use cuda_add_executable for CUDA please.
+    # 2. cuda_add_executable does not support ccache.
+    # Reference: https://cmake.org/cmake/help/v3.10/module/FindCUDA.html
+    add_executable(${TARGET_NAME} ${cinn_nv_test_SRCS})
+    get_property(os_dependency_modules GLOBAL PROPERTY OS_DEPENDENCY_MODULES)
+    target_link_libraries(
+      ${TARGET_NAME}
+      ${cinn_nv_test_DEPS}
+      cinn_gtest_main
+      gtest
+      ${os_dependency_modules}
+      ${CUDNN_LIBRARY}
+      ${CUBLAS_LIBRARIES}
+      ${CUDA_LIBRARIES})
+    add_dependencies(${TARGET_NAME} ${cinn_nv_test_DEPS} cinn_gtest_main gtest)
+    common_link(${TARGET_NAME})
+    add_test(
+      NAME ${TARGET_NAME}
+      COMMAND ${TARGET_NAME} "${cinn_nv_test_ARGS}"
+      WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
+    if(cinn_nv_test_SERIAL)
+      set_property(TEST ${TARGET_NAME} PROPERTY RUN_SERIAL 1)
+    endif()
+    target_link_libraries(
+      ${TARGET_NAME} Threads::Threads ${CUDA_NVRTC_LIB} ${CUDA_LIBRARIES}
+      ${CUDA_cudart_static_LIBRARY}
+      ${CUDA_TOOLKIT_ROOT_DIR}/lib64/stubs/libcuda.so)
+    if(NVTX_FOUND)
+      target_link_libraries(${TARGET_NAME} ${CUDA_NVTX_LIB})
+    endif()
+    remove_gflags(${TARGET_NAME})
+  endif()
+endfunction()
+# Add dependency that TARGET will depend on test result of DEP, this function executes the DEP during make.
+function(add_run_test_dependency TARGET_NAME DEP_NAME)
+  if(WITH_TESTING)
+    set(custom_target_name ${TARGET_NAME}_TEST_OUTPUT_DEPENDENCY_ON_${DEP_NAME})
+    add_custom_target(
+      ${custom_target_name}
+      COMMAND
+        cd ${CMAKE_CURRENT_BINARY_DIR} && ./${DEP_NAME}
+        --cinn_x86_builtin_code_root=${CMAKE_SOURCE_DIR}/paddle/cinn/backends
+      COMMAND cd ${CMAKE_BINARY_DIR}
+      DEPENDS ${DEP_NAME})
+    add_dependencies(${TARGET_NAME} ${DEP_NAME} ${custom_target_name})
+  endif()
+endfunction()
+# find all third_party modules is used for paddle static library
+# for reduce the dependency when building the inference libs.
+set_property(GLOBAL PROPERTY FLUID_THIRD_PARTY)
+function(find_fluid_thirdparties TARGET_NAME)
+  get_filename_component(__target_path ${TARGET_NAME} ABSOLUTE)
+  string(REGEX REPLACE "^${PADDLE_SOURCE_DIR}/" "" __target_path
+                       ${__target_path})
+  string(FIND "${__target_path}" "third_party" pos)
+  if(pos GREATER 1)
+    get_property(fluid_ GLOBAL PROPERTY FLUID_THIRD_PARTY)
+    set(fluid_third_partys ${fluid_third_partys} ${TARGET_NAME})
+    set_property(GLOBAL PROPERTY FLUID_THIRD_PARTY "${fluid_third_partys}")
+  endif()
+endfunction()
+function(cinn_merge_static_libs TARGET_NAME)
+  set(libs ${ARGN})
+  list(REMOVE_DUPLICATES libs)
+  # Get all propagation dependencies from the merged libraries
+  foreach(lib ${libs})
+    list(APPEND libs_deps ${${lib}_LIB_DEPENDS})
+  endforeach()
+  if(libs_deps)
+    list(REMOVE_DUPLICATES libs_deps)
+  endif()
+  # To produce a library we need at least one source file.
+  # It is created by add_custom_command below and will helps
+  # also help to track dependencies.
+  set(target_SRCS ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}_dummy.c)
+  if(APPLE) # Use OSX's libtool to merge archives
+    # Make the generated dummy source file depended on all static input
+    # libs. If input lib changes,the source file is touched
+    # which causes the desired effect (relink).
+    add_custom_command(
+      OUTPUT ${target_SRCS}
+      COMMAND ${CMAKE_COMMAND} -E touch ${target_SRCS}
+      DEPENDS ${libs})
+    # Generate dummy staic lib
+    file(WRITE ${target_SRCS}
+         "const char *dummy_${TARGET_NAME} = \"${target_SRCS}\";")
+    add_library(${TARGET_NAME} STATIC ${target_SRCS})
+    target_link_libraries(${TARGET_NAME} ${libs_deps})
+    foreach(lib ${libs})
+      # Get the file names of the libraries to be merged
+      set(libfiles ${libfiles} $<TARGET_FILE:${lib}>)
+    endforeach()
+    add_custom_command(
+      TARGET ${TARGET_NAME}
+      POST_BUILD
+      COMMAND rm "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a"
+      COMMAND /usr/bin/libtool -static -o
+              "${CMAKE_CURRENT_BINARY_DIR}/lib${TARGET_NAME}.a" ${libfiles})
+  endif()
+  if(LINUX
+  )# general UNIX: use "ar" to extract objects and re-add to a common lib
+    set(target_DIR ${CMAKE_CURRENT_BINARY_DIR}/${TARGET_NAME}.dir)
+    foreach(lib ${libs})
+      set(objlistfile ${target_DIR}/${lib}.objlist
+      )# list of objects in the input library
+      set(objdir ${target_DIR}/${lib}.objdir)
+      add_custom_command(
+        OUTPUT ${objdir}
+        COMMAND ${CMAKE_COMMAND} -E make_directory ${objdir}
+        DEPENDS ${lib})
+      add_custom_command(
+        OUTPUT ${objlistfile}
+        COMMAND ${CMAKE_AR} -x "$<TARGET_FILE:${lib}>"
+        COMMAND ${CMAKE_AR} -t "$<TARGET_FILE:${lib}>" > ${objlistfile}
+        DEPENDS ${lib} ${objdir}
+        WORKING_DIRECTORY ${objdir})
+      list(APPEND target_OBJS "${objlistfile}")
+    endforeach()
+    # Make the generated dummy source file depended on all static input
+    # libs. If input lib changes,the source file is touched
+    # which causes the desired effect (relink).
+    add_custom_command(
+      OUTPUT ${target_SRCS}
+      COMMAND ${CMAKE_COMMAND} -E touch ${target_SRCS}
+      DEPENDS ${libs} ${target_OBJS})
+    # Generate dummy static lib
+    file(WRITE ${target_SRCS}
+         "const char *dummy_${TARGET_NAME} = \"${target_SRCS}\";")
+    add_library(${TARGET_NAME} STATIC ${target_SRCS})
+    target_link_libraries(${TARGET_NAME} ${libs_deps})
+    # Get the file name of the generated library
+    set(target_LIBNAME "$<TARGET_FILE:${TARGET_NAME}>")
+    add_custom_command(
+      TARGET ${TARGET_NAME}
+      POST_BUILD
+      COMMAND ${CMAKE_AR} crs ${target_LIBNAME} `find ${target_DIR} -name '*.o'`
+      COMMAND ${CMAKE_RANLIB} ${target_LIBNAME}
+      WORKING_DIRECTORY ${target_DIR})
+  endif()
+  if(WIN32)
+    # windows do not support gcc/nvcc combined compiling. Use msvc lib.exe to merge libs.
+    # Make the generated dummy source file depended on all static input
+    # libs. If input lib changes,the source file is touched
+    # which causes the desired effect (relink).
+    add_custom_command(
+      OUTPUT ${target_SRCS}
+      COMMAND ${CMAKE_COMMAND} -E touch ${target_SRCS}
+      DEPENDS ${libs})
+    # Generate dummy static lib
+    file(WRITE ${target_SRCS}
+         "const char *dummy_${TARGET_NAME} = \"${target_SRCS}\";")
+    add_library(${TARGET_NAME} STATIC ${target_SRCS})
+    target_link_libraries(${TARGET_NAME} ${libs_deps})
+    foreach(lib ${libs})
+      # Get the file names of the libraries to be merged
+      set(libfiles ${libfiles} $<TARGET_FILE:${lib}>)
+    endforeach()
+    # msvc will put library in directory of "/Release/xxxlib" by default
+    #       COMMAND cmake -E remove "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/${TARGET_NAME}.lib"
+    add_custom_command(
+      TARGET ${TARGET_NAME}
+      POST_BUILD
+      COMMAND cmake -E make_directory
+              "${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}"
+      COMMAND
+        lib
+        /OUT:${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}/lib${TARGET_NAME}.lib
+        ${libfiles})
+  endif()
+endfunction()
+# Modification of standard 'protobuf_generate_cpp()' with protobuf-lite support
+# Usage:
+#   paddle_protobuf_generate_cpp(<proto_srcs> <proto_hdrs> <proto_files>)
+function(paddle_protobuf_generate_cpp SRCS HDRS)
+  if(NOT ARGN)
+    message(
+      SEND_ERROR
+        "Error: paddle_protobuf_generate_cpp() called without any proto files")
+    return()
+  endif()
+  set(${SRCS})
+  set(${HDRS})
+  foreach(FIL ${ARGN})
+    get_filename_component(ABS_FIL ${FIL} ABSOLUTE)
+    get_filename_component(FIL_WE ${FIL} NAME_WE)
+    set(_protobuf_protoc_src "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.cc")
+    set(_protobuf_protoc_hdr "${CMAKE_CURRENT_BINARY_DIR}/${FIL_WE}.pb.h")
+    list(APPEND ${SRCS} "${_protobuf_protoc_src}")
+    list(APPEND ${HDRS} "${_protobuf_protoc_hdr}")
+    add_custom_command(
+      OUTPUT "${_protobuf_protoc_src}" "${_protobuf_protoc_hdr}"
+      COMMAND ${CMAKE_COMMAND} -E make_directory "${CMAKE_CURRENT_BINARY_DIR}"
+      COMMAND ${PROTOBUF_PROTOC_EXECUTABLE} -I${CMAKE_SOURCE_DIR} --cpp_out
+              "${CMAKE_BINARY_DIR}" ${ABS_FIL}
+      DEPENDS ${ABS_FIL} protoc
+      COMMENT "Running C++ protocol buffer compiler on ${FIL}"
+      VERBATIM)
+  endforeach()
+  set_source_files_properties(${${SRCS}} ${${HDRS}} PROPERTIES GENERATED TRUE)
+  set(${SRCS}
+      ${${SRCS}}
+      PARENT_SCOPE)
+  set(${HDRS}
+      ${${HDRS}}
+      PARENT_SCOPE)
+endfunction()
+function(cinn_proto_library TARGET_NAME)
+  set(oneValueArgs "")
+  set(multiValueArgs SRCS DEPS)
+  cmake_parse_arguments(cinn_proto_library "${options}" "${oneValueArgs}"
+                        "${multiValueArgs}" ${ARGN})
+  set(proto_srcs)
+  set(proto_hdrs)
+  paddle_protobuf_generate_cpp(proto_srcs proto_hdrs ${cinn_proto_library_SRCS})
+  cinn_cc_library(${TARGET_NAME} SRCS ${proto_srcs} DEPS
+                  ${cinn_proto_library_DEPS} protobuf)
+  set("${TARGET_NAME}_HDRS"
+      ${proto_hdrs}
+      PARENT_SCOPE)
+  set("${TARGET_NAME}_SRCS"
+      ${proto_srcs}
+      PARENT_SCOPE)
+endfunction()
+function(common_link TARGET_NAME)
+  if(WITH_PROFILER)
+    target_link_libraries(${TARGET_NAME} gperftools::profiler)
+  endif()
+  if(WITH_JEMALLOC)
+    target_link_libraries(${TARGET_NAME} jemalloc::jemalloc)
+  endif()
+endfunction()
+# This method is borrowed from Paddle-Lite.
+function(download_and_uncompress INSTALL_DIR URL FILENAME)
+  message(STATUS "Download inference test stuff from ${URL}/${FILENAME}")
+  string(REGEX REPLACE "[-%.]" "_" FILENAME_EX ${FILENAME})
+  set(EXTERNAL_PROJECT_NAME "extern_lite_download_${FILENAME_EX}")
+  set(UNPACK_DIR "${INSTALL_DIR}/src/${EXTERNAL_PROJECT_NAME}")
+  ExternalProject_Add(
+    ${EXTERNAL_PROJECT_NAME}
+    ${EXTERNAL_PROJECT_LOG_ARGS}
+    PREFIX ${INSTALL_DIR}
+    DOWNLOAD_COMMAND
+      wget --no-check-certificate -q -O ${INSTALL_DIR}/${FILENAME}
+      ${URL}/${FILENAME} && ${CMAKE_COMMAND} -E tar xzf
+      ${INSTALL_DIR}/${FILENAME}
+    DOWNLOAD_DIR ${INSTALL_DIR}
+    DOWNLOAD_NO_PROGRESS 1
+    CONFIGURE_COMMAND ""
+    BUILD_COMMAND ""
+    UPDATE_COMMAND ""
+    INSTALL_COMMAND "")
+endfunction()
+function(gather_srcs SRC_GROUP)
+  set(options)
+  set(oneValueArgs)
+  set(multiValueArgs "SRCS")
+  cmake_parse_arguments(prefix "" "" "${multiValueArgs}" ${ARGN})
+  foreach(cpp ${prefix_SRCS})
+    set(${SRC_GROUP}
+        "${${SRC_GROUP}};${CMAKE_CURRENT_SOURCE_DIR}/${cpp}"
+        CACHE INTERNAL "")
+  endforeach()
+endfunction()
+function(core_gather_headers)
+  file(
+    GLOB includes
+    LIST_DIRECTORIES false
+    RELATIVE ${CMAKE_SOURCE_DIR}
+    *.h)
+  foreach(header ${includes})
+    set(core_includes
+        "${core_includes};${header}"
+        CACHE INTERNAL "")
+  endforeach()
+endfunction()
--- a/cmake/cinn/export.map
+++ b/cmake/cinn/export.map
+{
+	global:
+        RegisterKernels;
+	local:
+		*;
+};