Commit cba8f7f2 authored by Anthony Chang's avatar Anthony Chang
Browse files

Merge remote-tracking branch 'upstream/develop' into gemm-layernorm-4

parents cc50b687 b653c5eb
...@@ -45,4 +45,4 @@ build* ...@@ -45,4 +45,4 @@ build*
*~ *~
# GDB temporary files # GDB temporary files
.gdb_history .gdb_history
\ No newline at end of file
...@@ -7,7 +7,8 @@ list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake") ...@@ -7,7 +7,8 @@ list(APPEND CMAKE_MODULE_PATH "${PROJECT_SOURCE_DIR}/cmake")
enable_testing() enable_testing()
find_package(ROCM REQUIRED PATHS /opt/rocm) set(ROCM_SYMLINK_LIBS OFF)
find_package(ROCM 0.8 REQUIRED PATHS /opt/rocm)
include(ROCMInstallTargets) include(ROCMInstallTargets)
include(ROCMPackageConfigHelpers) include(ROCMPackageConfigHelpers)
...@@ -16,7 +17,7 @@ include(ROCMInstallSymlinks) ...@@ -16,7 +17,7 @@ include(ROCMInstallSymlinks)
include(ROCMCreatePackage) include(ROCMCreatePackage)
include(CheckCXXCompilerFlag) include(CheckCXXCompilerFlag)
rocm_setup_version(VERSION 1.0.0) rocm_setup_version(VERSION 0.2.0)
include(TargetFlags) include(TargetFlags)
list(APPEND CMAKE_PREFIX_PATH ${CMAKE_INSTALL_PREFIX} ${CMAKE_INSTALL_PREFIX}/llvm ${CMAKE_INSTALL_PREFIX}/hip /opt/rocm /opt/rocm/llvm /opt/rocm/hip) list(APPEND CMAKE_PREFIX_PATH ${CMAKE_INSTALL_PREFIX} ${CMAKE_INSTALL_PREFIX}/llvm ${CMAKE_INSTALL_PREFIX}/hip /opt/rocm /opt/rocm/llvm /opt/rocm/hip)
...@@ -70,7 +71,6 @@ if( DEFINED CK_OVERRIDE_HIP_VERSION_PATCH ) ...@@ -70,7 +71,6 @@ if( DEFINED CK_OVERRIDE_HIP_VERSION_PATCH )
endif() endif()
message(STATUS "Build with HIP ${HIP_VERSION}") message(STATUS "Build with HIP ${HIP_VERSION}")
rocm_create_package( rocm_create_package(
NAME composablekernel NAME composablekernel
DESCRIPTION "High Performance Composable Kernel for AMD GPUs" DESCRIPTION "High Performance Composable Kernel for AMD GPUs"
...@@ -78,10 +78,6 @@ rocm_create_package( ...@@ -78,10 +78,6 @@ rocm_create_package(
LDCONFIG LDCONFIG
) )
## half
set(HALF_INCLUDE_DIR "${PROJECT_SOURCE_DIR}/external/include/half")
message("HALF_INCLUDE_DIR: ${HALF_INCLUDE_DIR}")
## tidy ## tidy
include(EnableCompilerWarnings) include(EnableCompilerWarnings)
set(CK_TIDY_ERRORS ERRORS * -readability-inconsistent-declaration-parameter-name) set(CK_TIDY_ERRORS ERRORS * -readability-inconsistent-declaration-parameter-name)
...@@ -229,7 +225,6 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin) ...@@ -229,7 +225,6 @@ set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/bin)
include_directories(BEFORE include_directories(BEFORE
${PROJECT_SOURCE_DIR}/include ${PROJECT_SOURCE_DIR}/include
${PROJECT_BINARY_DIR}/include
${PROJECT_SOURCE_DIR}/library/include ${PROJECT_SOURCE_DIR}/library/include
) )
...@@ -243,6 +238,11 @@ message("CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}") ...@@ -243,6 +238,11 @@ message("CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}")
add_custom_target(check COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure -C ${CMAKE_CFG_INTDIR}) add_custom_target(check COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure -C ${CMAKE_CFG_INTDIR})
rocm_package_setup_component(tests
LIBRARY_NAME composablekernel
PACKAGE_NAME tests # Prevent -static suffix on package name
)
add_subdirectory(library) add_subdirectory(library)
add_subdirectory(example) add_subdirectory(example)
add_subdirectory(test) add_subdirectory(test)
...@@ -264,8 +264,19 @@ configure_package_config_file(${CMAKE_CURRENT_SOURCE_DIR}/Config.cmake.in ...@@ -264,8 +264,19 @@ configure_package_config_file(${CMAKE_CURRENT_SOURCE_DIR}/Config.cmake.in
NO_CHECK_REQUIRED_COMPONENTS_MACRO NO_CHECK_REQUIRED_COMPONENTS_MACRO
) )
install(FILES rocm_install(FILES
"${CMAKE_CURRENT_BINARY_DIR}/composable_kernelConfig.cmake" "${CMAKE_CURRENT_BINARY_DIR}/composable_kernelConfig.cmake"
"${CMAKE_CURRENT_BINARY_DIR}/composable_kernelConfigVersion.cmake" "${CMAKE_CURRENT_BINARY_DIR}/composable_kernelConfigVersion.cmake"
DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/composable_kernel DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/composable_kernel
) )
set(CPACK_RESOURCE_FILE_LICENSE "${CMAKE_CURRENT_SOURCE_DIR}/LICENSE")
set(CPACK_RPM_PACKAGE_LICENSE "MIT")
rocm_create_package(
NAME composablekernel
DESCRIPTION "High Performance Composable Kernel for AMD GPUs"
MAINTAINER "MIOpen Kernels Dev Team <dl.MIOpen@amd.com>"
LDCONFIG
HEADER_ONLY
)
...@@ -88,3 +88,8 @@ ADD rbuild.ini /rbuild.ini ...@@ -88,3 +88,8 @@ ADD rbuild.ini /rbuild.ini
ADD dev-requirements.txt dev-requirements.txt ADD dev-requirements.txt dev-requirements.txt
RUN rbuild prepare -s develop -d $PREFIX RUN rbuild prepare -s develop -d $PREFIX
RUN groupadd -f render RUN groupadd -f render
# Install the new rocm-cmake version
RUN git clone -b master https://github.com/RadeonOpenCompute/rocm-cmake.git && \
cd rocm-cmake && mkdir build && cd build && \
cmake .. && cmake --build . && cmake --build . --target install
...@@ -379,23 +379,23 @@ pipeline { ...@@ -379,23 +379,23 @@ pipeline {
} }
} }
} }
stage("Client App") //stage("Client App")
{ //{
parallel // parallel
{ // {
stage("Run Client App") // stage("Run Client App")
{ // {
agent{ label rocmnode("gfx908")} // agent{ label rocmnode("gfx908")}
environment{ // environment{
setup_args = """ -D -DBUILD_DEV=Off -DCMAKE_INSTALL_PREFIX=../install CMAKE_CXX_FLAGS="--offload-arch=gfx908 -O3 " """ // setup_args = """ -D -DBUILD_DEV=Off -DCMAKE_INSTALL_PREFIX=../install CMAKE_CXX_FLAGS="--offload-arch=gfx908 -O3 " """
execute_args = """ cd ../test/client_app && rm -rf build && mkdir build && cd build && cmake -DCMAKE_PREFIX_PATH="${env.WORKSPACE}/install;/opt/rocm" .. && make """ // execute_args = """ cd ../test/client_app && rm -rf build && mkdir build && cd build && cmake -DCMAKE_PREFIX_PATH="${env.WORKSPACE}/install;/opt/rocm" .. && make """
} // }
steps{ // steps{
buildHipClangJobAndReboot(setup_args: setup_args, config_targets: "install", no_reboot:true, build_type: 'Release', execute_cmd: execute_args, prefixpath: '/usr/local') // buildHipClangJobAndReboot(setup_args: setup_args, config_targets: "install", no_reboot:true, build_type: 'Release', execute_cmd: execute_args, prefixpath: '/usr/local')
} // }
} // }
} // }
} //}
stage("Performance Tests") stage("Performance Tests")
{ {
parallel parallel
......
Copyright (c) 2018- , Advanced Micro Devices, Inc. (Chao Liu, Jing Zhang)
Copyright (c) 2019- , Advanced Micro Devices, Inc. (Letao Qin, Qianfeng Zhang, Liang Huang, Shaojie Wang)
Copyright (c) 2022- , Advanced Micro Devices, Inc. (Anthony Chang, Chunyu Lai, Illia Silin, Adam Osewski, Poyen Chen, Jehandad Khan)
Copyright (c) 2019-2021, Advanced Micro Devices, Inc. (Hanwen Chang)
Copyright (c) 2019-2020, Advanced Micro Devices, Inc. (Tejash Shah)
Copyright (c) 2020 , Advanced Micro Devices, Inc. (Xiaoyan Zhou)
Copyright (c) 2021-2022, Advanced Micro Devices, Inc. (Jianfeng Yan)
SPDX-License-Identifier: MIT
Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
...@@ -6,10 +6,13 @@ docker run \ ...@@ -6,10 +6,13 @@ docker run \
--group-add sudo \ --group-add sudo \
-w /root/workspace \ -w /root/workspace \
-v ${PATH_TO_LOCAL_WORKSPACE}:/root/workspace \ -v ${PATH_TO_LOCAL_WORKSPACE}:/root/workspace \
rocm/tensorflow:rocm4.3.1-tf2.6-dev \ rocm/tensorflow:rocm5.1-tf2.6-dev \
/bin/bash /bin/bash
``` ```
# Install the new rocm-cmake version
https://github.com/RadeonOpenCompute/rocm-cmake
## Build ## Build
```bash ```bash
mkdir build && cd build mkdir build && cd build
...@@ -34,7 +37,7 @@ Instructions for running each individual examples are under ```example/``` ...@@ -34,7 +37,7 @@ Instructions for running each individual examples are under ```example/```
## Tests ## Tests
```bash ```bash
make -j tests make -j examples tests
make test make test
``` ```
......
...@@ -8,7 +8,7 @@ endif() ...@@ -8,7 +8,7 @@ endif()
message(STATUS "Fetching GoogleTest") message(STATUS "Fetching GoogleTest")
list(APPEND GTEST_CMAKE_CXX_FLAGS list(APPEND GTEST_CMAKE_CXX_FLAGS
-Wno-undef -Wno-undef
-Wno-reserved-identifier -Wno-reserved-identifier
-Wno-global-constructors -Wno-global-constructors
...@@ -31,7 +31,11 @@ FetchContent_Declare( ...@@ -31,7 +31,11 @@ FetchContent_Declare(
# Will be necessary for windows build # Will be necessary for windows build
# set(gtest_force_shared_crt ON CACHE BOOL "" FORCE) # set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
FetchContent_MakeAvailable(googletest) FetchContent_GetProperties(googletest)
if(NOT googletest_POPULATED)
FetchContent_Populate(googletest)
add_subdirectory(${googletest_SOURCE_DIR} ${googletest_BINARY_DIR} EXCLUDE_FROM_ALL)
endif()
target_compile_options(gtest PRIVATE ${GTEST_CMAKE_CXX_FLAGS}) target_compile_options(gtest PRIVATE ${GTEST_CMAKE_CXX_FLAGS})
target_compile_options(gtest_main PRIVATE ${GTEST_CMAKE_CXX_FLAGS}) target_compile_options(gtest_main PRIVATE ${GTEST_CMAKE_CXX_FLAGS})
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream> #include <iostream>
#include <numeric> #include <numeric>
#include <initializer_list> #include <initializer_list>
#include <cstdlib> #include <cstdlib>
#include <stdlib.h>
#include <half.hpp> #include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "check_err.hpp" #include "ck/tensor_operation/gpu/device/device_gemm_dl.hpp"
#include "config.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "device.hpp"
#include "host_tensor.hpp" #include "ck/library/utility/check_err.hpp"
#include "host_tensor_generator.hpp" #include "ck/library/host_tensor/device_memory.hpp"
#include "device_tensor.hpp" #include "ck/library/host_tensor/host_tensor.hpp"
#include "device_gemm_dl.hpp" #include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "element_wise_operation.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "reference_gemm.hpp"
#include "gemm_specialization.hpp"
template <ck::index_t... Is> template <ck::index_t... Is>
using S = ck::Sequence<Is...>; using S = ck::Sequence<Is...>;
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream> #include <iostream>
#include <numeric> #include <numeric>
#include <initializer_list> #include <initializer_list>
#include <cstdlib> #include <cstdlib>
#include <stdlib.h>
#include <half.hpp> #include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "check_err.hpp" #include "ck/tensor_operation/gpu/device/device_gemm_dl.hpp"
#include "config.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "device.hpp"
#include "host_tensor.hpp" #include "ck/library/utility/check_err.hpp"
#include "host_tensor_generator.hpp" #include "ck/library/host_tensor/device_memory.hpp"
#include "device_tensor.hpp" #include "ck/library/host_tensor/host_tensor.hpp"
#include "device_gemm_dl.hpp" #include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "element_wise_operation.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "reference_gemm.hpp"
#include "gemm_specialization.hpp"
template <ck::index_t... Is> template <ck::index_t... Is>
using S = ck::Sequence<Is...>; using S = ck::Sequence<Is...>;
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream> #include <iostream>
#include <numeric> #include <numeric>
#include <initializer_list> #include <initializer_list>
#include <cstdlib> #include <cstdlib>
#include <stdlib.h>
#include <half.hpp> #include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "check_err.hpp" #include "ck/tensor_operation/gpu/device/device_gemm_dl.hpp"
#include "config.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "device.hpp"
#include "host_tensor.hpp" #include "ck/library/utility/check_err.hpp"
#include "host_tensor_generator.hpp" #include "ck/library/host_tensor/device_memory.hpp"
#include "device_tensor.hpp" #include "ck/library/host_tensor/host_tensor.hpp"
#include "device_gemm_dl.hpp" #include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "element_wise_operation.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "reference_gemm.hpp"
#include "gemm_specialization.hpp"
template <ck::index_t... Is> template <ck::index_t... Is>
using S = ck::Sequence<Is...>; using S = ck::Sequence<Is...>;
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream> #include <iostream>
#include <numeric> #include <numeric>
#include <initializer_list> #include <initializer_list>
#include <cstdlib> #include <cstdlib>
#include <stdlib.h>
#include <half.hpp> #include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/device_gemm_xdl_cshuffle.hpp"
#include "check_err.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "config.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "device.hpp"
#include "host_tensor.hpp" #include "ck/library/host_tensor/device_memory.hpp"
#include "host_tensor_generator.hpp" #include "ck/library/host_tensor/host_tensor.hpp"
#include "device_tensor.hpp" #include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "device_gemm_xdl_cshuffle.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "element_wise_operation.hpp" #include "ck/library/utility/check_err.hpp"
#include "reference_gemm.hpp"
#include "gemm_specialization.hpp"
template <ck::index_t... Is> template <ck::index_t... Is>
using S = ck::Sequence<Is...>; using S = ck::Sequence<Is...>;
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream> #include <iostream>
#include <numeric> #include <numeric>
#include <initializer_list> #include <initializer_list>
#include <cstdlib> #include <cstdlib>
#include <stdlib.h>
#include <half.hpp> #include "ck/ck.hpp"
#include "check_err.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "config.hpp" #include "ck/tensor_operation/gpu/device/device_gemm_xdl_cshuffle.hpp"
#include "device.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "host_tensor.hpp"
#include "host_tensor_generator.hpp" #include "ck/library/utility/check_err.hpp"
#include "device_tensor.hpp" #include "ck/library/host_tensor/device_memory.hpp"
#include "device_gemm_xdl.hpp" #include "ck/library/host_tensor/host_tensor.hpp"
#include "device_gemm_xdl_cshuffle.hpp" #include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "element_wise_operation.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "reference_gemm.hpp"
#include "gemm_specialization.hpp"
template <ck::index_t... Is> template <ck::index_t... Is>
using S = ck::Sequence<Is...>; using S = ck::Sequence<Is...>;
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream> #include <iostream>
#include <numeric> #include <numeric>
#include <initializer_list> #include <initializer_list>
#include <cstdlib> #include <cstdlib>
#include <stdlib.h>
#include <half.hpp> #include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "check_err.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "config.hpp" #include "ck/tensor_operation/gpu/device/device_gemm_xdl.hpp"
#include "device.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "host_tensor.hpp"
#include "host_tensor_generator.hpp" #include "ck/library/host_tensor/device_memory.hpp"
#include "device_tensor.hpp" #include "ck/library/host_tensor/host_tensor.hpp"
#include "device_gemm_xdl.hpp" #include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "device_gemm_xdl_cshuffle.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "element_wise_operation.hpp" #include "ck/library/utility/check_err.hpp"
#include "reference_gemm.hpp"
#include "gemm_specialization.hpp"
template <ck::index_t... Is> template <ck::index_t... Is>
using S = ck::Sequence<Is...>; using S = ck::Sequence<Is...>;
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream> #include <iostream>
#include <numeric> #include <numeric>
#include <initializer_list> #include <initializer_list>
#include <cstdlib> #include <cstdlib>
#include <stdlib.h>
#include <half.hpp> #include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "check_err.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "config.hpp" #include "ck/tensor_operation/gpu/device/device_gemm_xdl_cshuffle.hpp"
#include "device.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "host_tensor.hpp"
#include "host_tensor_generator.hpp" #include "ck/library/utility/check_err.hpp"
#include "device_tensor.hpp" #include "ck/library/host_tensor/device_memory.hpp"
#include "device_gemm_xdl_cshuffle.hpp" #include "ck/library/host_tensor/host_tensor.hpp"
#include "element_wise_operation.hpp" #include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "reference_gemm.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "gemm_specialization.hpp"
template <ck::index_t... Is> template <ck::index_t... Is>
using S = ck::Sequence<Is...>; using S = ck::Sequence<Is...>;
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream> #include <iostream>
#include <numeric> #include <numeric>
#include <initializer_list> #include <initializer_list>
#include <cstdlib> #include <cstdlib>
#include <stdlib.h>
#include <half.hpp> #include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "check_err.hpp" #include "ck/tensor_operation/gpu/device/device_gemm_xdl_c_shuffle_bias_2d.hpp"
#include "config.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "print.hpp"
#include "device.hpp" #include "ck/library/utility/check_err.hpp"
#include "host_tensor.hpp" #include "ck/library/host_tensor/device_memory.hpp"
#include "host_tensor_generator.hpp" #include "ck/library/host_tensor/host_tensor.hpp"
#include "host_gemm.hpp" #include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "device_tensor.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_gemm_bias_2d.hpp"
#include "device_base.hpp"
#include "device_gemm_xdl_c_shuffle_bias_2d.hpp"
#include "element_wise_operation.hpp"
#include "reference_gemm_bias_2d.hpp"
template <ck::index_t... Is> template <ck::index_t... Is>
using S = ck::Sequence<Is...>; using S = ck::Sequence<Is...>;
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream> #include <iostream>
#include <numeric> #include <numeric>
#include <initializer_list> #include <initializer_list>
#include <cstdlib> #include <cstdlib>
#include <stdlib.h>
#include "ck/ck.hpp"
#include "check_err.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "config.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "device.hpp" #include "ck/tensor_operation/gpu/device/device_gemm_multiple_d_xdl_cshuffle.hpp"
#include "host_tensor.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "host_tensor_generator.hpp"
#include "device_tensor.hpp" #include "ck/library/host_tensor/device_memory.hpp"
#include "element_wise_operation.hpp" #include "ck/library/host_tensor/host_tensor.hpp"
#include "reference_gemm.hpp" #include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "gemm_specialization.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "device_gemm_multiple_d_xdl_cshuffle.hpp" #include "ck/library/utility/check_err.hpp"
template <ck::index_t... Is> template <ck::index_t... Is>
using S = ck::Sequence<Is...>; using S = ck::Sequence<Is...>;
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream> #include <iostream>
#include <numeric> #include <numeric>
#include <initializer_list> #include <initializer_list>
#include <cstdlib> #include <cstdlib>
#include <stdlib.h>
#include "ck/ck.hpp"
#include "check_err.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "config.hpp" #include "ck/tensor_operation/gpu/device/gemm_specialization.hpp"
#include "device.hpp" #include "ck/tensor_operation/gpu/device/device_gemm_multiple_d_xdl_cshuffle.hpp"
#include "host_tensor.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "host_tensor_generator.hpp"
#include "device_tensor.hpp" #include "ck/library/host_tensor/device_memory.hpp"
#include "element_wise_operation.hpp" #include "ck/library/host_tensor/host_tensor.hpp"
#include "reference_gemm.hpp" #include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "gemm_specialization.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_gemm.hpp"
#include "device_gemm_multiple_d_xdl_cshuffle.hpp" #include "ck/library/utility/check_err.hpp"
template <ck::index_t... Is> template <ck::index_t... Is>
using S = ck::Sequence<Is...>; using S = ck::Sequence<Is...>;
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream> #include <iostream>
#include <numeric> #include <numeric>
#include <initializer_list> #include <initializer_list>
#include <cstdlib> #include <cstdlib>
#include <stdlib.h>
#include <half.hpp> #include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "check_err.hpp" #include "ck/tensor_operation/gpu/device/device_conv2d_fwd_xdl_c_shuffle_bias_activation_nhwc_kyxc_nhwk.hpp"
#include "config.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "conv_util.hpp"
#include "device.hpp" #include "ck/library/utility/check_err.hpp"
#include "device_conv2d_fwd_xdl_c_shuffle_bias_activation_nhwc_kyxc_nhwk.hpp" #include "ck/library/utility/conv_util.hpp"
#include "device_tensor.hpp" #include "ck/library/host_tensor/device_memory.hpp"
#include "element_wise_operation.hpp" #include "ck/library/host_tensor/host_tensor.hpp"
#include "host_tensor.hpp" #include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "host_tensor_generator.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation.hpp"
#include "reference_conv_fwd_bias_activation.hpp"
#include "tensor_layout.hpp"
namespace { namespace {
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <iostream> #include <iostream>
#include <numeric> #include <numeric>
#include <initializer_list> #include <initializer_list>
#include <cstdlib> #include <cstdlib>
#include <stdlib.h>
#include <half.hpp> #include "ck/ck.hpp"
#include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "check_err.hpp" #include "ck/tensor_operation/gpu/device/device_conv2d_fwd_xdl_c_shuffle_bias_activation_add_nhwc_kyxc_nhwk.hpp"
#include "config.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "conv_util.hpp"
#include "device.hpp" #include "ck/library/utility/check_err.hpp"
#include "device_conv2d_fwd_xdl_c_shuffle_bias_activation_add_nhwc_kyxc_nhwk.hpp" #include "ck/library/utility/conv_util.hpp"
#include "device_tensor.hpp" #include "ck/library/host_tensor/device_memory.hpp"
#include "element_wise_operation.hpp" #include "ck/library/host_tensor/host_tensor.hpp"
#include "host_tensor.hpp" #include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "host_tensor_generator.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_conv_fwd_bias_activation_add.hpp"
#include "reference_conv_fwd_bias_activation_add.hpp"
#include "tensor_layout.hpp"
namespace { namespace {
......
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include <cstdlib> #include <cstdlib>
#include <iostream> #include <iostream>
#include <numeric> #include <numeric>
#include <type_traits> #include <type_traits>
#include "check_err.hpp" #include "ck/ck.hpp"
#include "config.hpp" #include "ck/tensor_operation/gpu/device/tensor_layout.hpp"
#include "conv_util.hpp" #include "ck/tensor_operation/gpu/device/device_convnd_fwd_xdl_nhwc_kyxc_nhwk.hpp"
#include "device.hpp" #include "ck/tensor_operation/gpu/element/element_wise_operation.hpp"
#include "device_tensor.hpp"
#include "device_convnd_fwd_xdl_nhwc_kyxc_nhwk.hpp" #include "ck/library/utility/check_err.hpp"
#include "element_wise_operation.hpp" #include "ck/library/utility/conv_util.hpp"
#include "host_tensor.hpp" #include "ck/library/host_tensor/device_memory.hpp"
#include "host_tensor_generator.hpp" #include "ck/library/host_tensor/host_tensor.hpp"
#include "reference_conv_fwd.hpp" #include "ck/library/host_tensor/host_tensor_generator.hpp"
#include "tensor_layout.hpp" #include "ck/library/reference_tensor_operation/cpu/reference_conv_fwd.hpp"
namespace { namespace {
...@@ -291,8 +294,8 @@ int main(int argc, char* argv[]) ...@@ -291,8 +294,8 @@ int main(int argc, char* argv[])
float tflops = static_cast<float>(flop) / 1.E9 / ave_time; float tflops = static_cast<float>(flop) / 1.E9 / ave_time;
float gb_per_sec = num_btype / 1.E6 / ave_time; float gb_per_sec = num_btype / 1.E6 / ave_time;
std::cout << "Perf: " << ave_time << " ms, " << tflops << " TFlops, " << gb_per_sec << " GB/s, " << conv->GetTypeString() std::cout << "Perf: " << ave_time << " ms, " << tflops << " TFlops, " << gb_per_sec << " GB/s, "
<< std::endl; << conv->GetTypeString() << std::endl;
if(do_verification) if(do_verification)
{ {
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment