Commit d22dbec2 authored by zhoux's avatar zhoux
Browse files

Initial commit: release hytlass-0.1.0

parents
# HYGON HYTLASS Changelog
# HYTLASS 0.1.0新增
HYTLASS 0.1.0是HYTLASS的首次提交,其实现基于CUTLASS 3.5.0,具体地,提供了:
- HYTLASS兼容和支持CUTLASS 2.x实现:
- 支持BW(GFX936)及更早架构下的指令特性,包括支持TensorCore的mmac及ds_read_matrix原语。
- 支持基于这些架构指令特性下的矩阵乘法实现。
- 支持基于矩阵乘法模版实现的基于隐式矩阵乘法的卷积实现。
- HYTLASS兼容和支持CUTLASS 3.x及CuTe编程模型:
- 适配基于DCU架构对CUTLASS 3.x中的CuTe编程模型HuTe,目前已支持至BW平台的指令原语。
- 支持基于HuTe模型的矩阵乘法实现,包括MMA部分和Epilogue部分。
- 支持基于HuTe模型的Kernel调度、Tiling调度等计算任务调度架构,支持多种线程块调度优化策略。
- 十余个计算示例支持:
- 支持基于2.x的多种数据类型(TF32/FP16/BF16/I8/U8)的矩阵乘法、卷积及其融合算子实现。
- 支持基于Split-K、Stream-K等计算优化算法的矩阵乘法示例。
- 支持基于访问者模式的自定义尾声处理示例。
- 支持基于HuTe的矩阵乘法示例,包括BatchedGemm、GroupGemm等示例。
- 支持使用TensorCore加速的基于Block Ell格式的稀疏矩阵乘法示例。
- 工具链支持:
- 支持hytlass_profiler,用于细粒度问题参数下的kernel tuning。
This diff is collapsed.
# Copyright (c) 2023 - 2025 Hygon Information Technology Co., Ltd. All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
find_package(HIP REQUIRED CONFIG PATHS ${DCU_TOOLKIT_ROOT_DIR})
################# RT #########################
find_library(
GALAXY_HIP galaxyhip
${DCU_TOOLKIT_ROOT_DIR}/lib
NO_DEFAULT_PATH
)
if(NOT TARGET hip::galaxyhip AND GALAXY_HIP)
message(STATUS "Found galaxyhip: True")
add_library(galaxyhip SHARED IMPORTED GLOBAL)
add_library(hip::galaxyhip ALIAS hipgalaxy)
set_property(
TARGET galaxyhip
PROPERTY IMPORTED_LOCATION
${GALAXY_HIP}
)
elseif(TARGET hip::galaxyhip)
message(STATUS "Found galaxyhip: True")
else()
message(STATUS "Found galaxyhip: True")
endif()
find_library(
HIPRTC_LIBRARY hiprtc
PATHS
${DCU_TOOLKIT_ROOT_DIR}/lib
NO_DEFAULT_PATH
)
if(NOT TARGET hiprtc AND HIPRTC_LIBRARY)
message(STATUS "Found hiprtc: True")
add_library(hiprtc SHARED IMPORTED GLOBAL)
add_library(hip::hiprtc ALIAS hiprtc)
set_property(
TARGET hiprtc
PROPERTY IMPORTED_LOCATION
${HIPRTC_LIBRARY}
)
elseif(TARGET hiprtc)
message(STATUS "Found hiprtc: True")
else()
message(STATUS "Found hiprtc: False")
endif()
include_directories(SYSTEM "${DCU_TOOLKIT_ROOT_DIR}/include")
# set hip property as *.cu
function(hytlass_correct_source_file_language_property)
foreach(File ${ARGN})
# add compile option -xhip while using clang++
if(File MATCHES ".*\.cu$")
# set_source_files_properties(${File} PROPERTIES COMPILE_FLAGS "-x hip")
set_source_files_properties(${File} PROPERTIES LANGUAGE CXX)
endif()
endforeach()
endfunction()
set(HYTLASS_UNITY_BUILD_ENABLED_INIT OFF)
set(HYTLASS_UNITY_BUILD_ENABLED ${HYTLASS_UNITY_BUILD_ENABLED_INIT} CACHE BOOL "Enable combined source compilation")
set(HYTLASS_UNITY_BUILD_BATCH_SIZE_INIT 16)
set(HYTLASS_UNITY_BUILD_BATCH_SIZE ${HYTLASS_UNITY_BUILD_BATCH_SIZE_INIT} CACHE STRING "Batch size for unified source files")
# set unify
function(hytlass_unify_source_files TARGET_ARGS_VAR)
set(options)
set(oneValueArgs BATCH_SOURCES BATCH_SIZE)
set(multiValueArgs)
cmake_parse_arguments(_ "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
if (NOT DEFINED TARGET_ARGS_VAR)
message(FATAL_ERROR "TARGET_ARGS_VAR parameter is required")
endif()
if (__BATCH_SOURCES AND NOT DEFINED __BATCH_SIZE)
set(__BATCH_SIZE ${HYTLASS_UNITY_BUILD_BATCH_SIZE})
endif()
if (HYTLASS_UNITY_BUILD_ENABLED AND DEFINED __BATCH_SIZE AND __BATCH_SIZE GREATER 1)
set(HIP_FILE_ARGS)
set(TARGET_SOURCE_ARGS)
foreach(ARG ${__UNPARSED_ARGUMENTS})
if(${ARG} MATCHES ".*\.cu$")
list(APPEND HIP_FILE_ARGS ${ARG})
else()
list(APPEND TARGET_SOURCE_ARGS ${ARG})
endif()
endforeach()
list(LENGTH HIP_FILE_ARGS NUM_HIP_FILE_ARGS)
while(NUM_HIP_FILE_ARGS GREATER 0)
list(SUBLIST HIP_FILE_ARGS 0 ${__BATCH_SIZE} HIP_FILE_BATCH)
string(SHA256 HIP_FILE_BATCH_HASH "${HIP_FILE_BATCH}")
string(SUBSTRING ${HIP_FILE_BATCH_HASH} 0 12 HIP_FILE_BATCH_HASH)
set(BATCH_FILE ${CMAKE_CURRENT_BINARY_DIR}/${NAME}.unity.${HIP_FILE_BATCH_HASH}.cu)
message(STATUS "Generating ${BATCH_FILE}")
file(WRITE ${BATCH_FILE} "// Unity File - Auto Generated!\n")
foreach(HIP_FILE ${HIP_FILE_BATCH})
get_filename_component(HIP_FILE_ABS_PATH ${HIP_FILE} ABSOLUTE)
file(APPEND ${BATCH_FILE} "#include \"${HIP_FILE_ABS_PATH}\"\n")
endforeach()
list(APPEND TARGET_SOURCE_ARGS ${BATCH_FILE})
if (NUM_HIP_FILE_ARGS LESS_EQUAL __BATCH_SIZE)
break()
endif()
list(SUBLIST HIP_FILE_ARGS ${__BATCH_SIZE} -1 HIP_FILE_ARGS)
list(LENGTH HIP_FILE_ARGS NUM_HIP_FILE_ARGS)
endwhile()
else()
set(TARGET_SOURCE_ARGS ${__UNPARSED_ARGUMENTS})
endif()
set(${TARGET_ARGS_VAR} ${TARGET_SOURCE_ARGS} PARENT_SCOPE)
endfunction()
# unify -> set property -> add library
function(hytlass_add_library NAME)
set(options SKIP_GENCODE_FLAGS)
set(oneValueArgs EXPORT_NAME)
set(multiValueArgs)
cmake_parse_arguments(_ "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
hytlass_unify_source_files(TARGET_SOURCE_ARGS ${__UNPARSED_ARGUMENTS})
hytlass_correct_source_file_language_property(${TARGET_SOURCE_ARGS})
add_library(${NAME} ${TARGET_SOURCE_ARGS} "")
hytlass_apply_standard_compile_options(${NAME})
if (NOT __SKIP_GENCODE_FLAGS)
hytlass_apply_hip_gencode_flags(${NAME})
endif()
target_compile_features(
${NAME}
INTERFACE
cxx_std_11
)
if(__EXPORT_NAME)
add_library(hygon::hytlass::${__EXPORT_NAME} ALIAS ${NAME})
set_target_properties(${NAME} PROPERTIES EXPORT_NAME ${__EXPORT_NAME})
endif()
endfunction()
function(hytlass_add_executable NAME)
set(options)
set(oneValueArgs)
set(multiValueArgs)
cmake_parse_arguments(_ "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
hytlass_unify_source_files(TARGET_SOURCE_ARGS ${__UNPARSED_ARGUMENTS})
hytlass_correct_source_file_language_property(${TARGET_SOURCE_ARGS})
add_executable(${NAME} ${TARGET_SOURCE_ARGS})
hytlass_apply_standard_compile_options(${NAME})
hytlass_apply_hip_gencode_flags(${NAME})
target_compile_features(
${NAME}
INTERFACE
cxx_std_11
)
endfunction()
function(hytlass_target_sources NAME)
set(options)
set(oneValueArgs)
set(multiValueArgs)
cmake_parse_arguments(_ "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
hytlass_unify_source_files(TARGET_SOURCE_ARGS ${__UNPARSED_ARGUMENTS})
hytlass_correct_source_file_language_property(${TARGET_SOURCE_ARGS})
target_sources(${NAME} ${TARGET_SOURCE_ARGS})
endfunction()
Copyright (c) 2023 - 2025 Hygon Information Technology Co., Ltd. All rights reserved.
SPDX-License-Identifier: BSD-3-Clause
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-------------------------------------------------------------------------
The following copyright statements and licenses apply to various open source software/model
packages (or portions thereof) that are distributed with this MUTLASS. MUTLASS that
includes this file does not necessarily use all the open source software packages referred
to below and may also only use portions of a given package. Some open source software
packages referred to below may have been modified by Moore Threads Technology Co., Ltd
-------------------------------------------------------------------------
cutlass
Copyright (c) 2017 - 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
SPDX-License-Identifier: BSD-3-Clause
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
3. Neither the name of the copyright holder nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-------------------------------------------------------------------------
googletest
Copyright 2008, Google Inc.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# HYTLASS 0.1.0
_HYTLASS 0.1.0 - 2025年12月_
HYTLASS(HYGON DCU Templates for Linear Algebra Subroutines)是一个用于在海光DCU架构下实现高性能矩阵乘法(GEMM)及其衍生计算的C++模板库,其设计思路来源于CUTLASS,将数据搬运、层次化结构等可动部件分解为可复用、模块化的软件组件,
并通过C++模板类进行抽象。
HYTLASS同时兼容了基于CUTLASS 2.x实现的基于C++模版的矩阵乘和卷积,以及基于CUTLASS 3.x引入的CuTe编程模型。
你可以参考[快速入门指南](media/docs/quickstart.md)来快速入门使用HYTLASS。
# HYTLASS 0.1.0新增
HYTLASS 0.1.0是HYTLASS的首次提交,其实现基于CUTLASS 3.5.0,具体地,提供了:
- HYTLASS兼容和支持CUTLASS 2.x实现:
- 支持BW(GFX936)及更早架构下的指令特性,包括支持TensorCore的mmac及ds_read_matrix原语。
- 支持基于这些架构指令特性下的矩阵乘法实现。
- 支持基于矩阵乘法模版实现的基于隐式矩阵乘法的卷积实现。
- HYTLASS兼容和支持CUTLASS 3.x及CuTe编程模型:
- 适配基于DCU架构对CUTLASS 3.x中的CuTe编程模型HuTe,目前已支持至BW平台的指令原语。
- 支持基于HuTe模型的矩阵乘法实现,包括MMA部分和Epilogue部分。
- 支持基于HuTe模型的Kernel调度、Tiling调度等计算任务调度架构,支持多种线程块调度优化策略。
- 十余个计算示例支持(详见[示例](examples)):
- 支持基于2.x的多种数据类型(TF32/FP16/BF16/I8/U8)的矩阵乘法、卷积及其融合算子实现。
- 支持基于Split-K、Stream-K等计算优化算法的矩阵乘法示例。
- 支持基于访问者模式的自定义尾声处理示例。
- 支持基于HuTe的矩阵乘法示例,包括BatchedGemm、GroupGemm等示例。
- 支持使用TensorCore加速的基于Block Ell格式的稀疏矩阵乘法示例。
- 工具链支持:
- 支持hytlass_profiler,用于细粒度问题参数下的kernel tuning。
# 编译HYTLASS
在其他项目中,HYTLASS作为头文件库不需要单独编译,用户将`include/`目录指定至头文件路径即可。
HYTLASS的单元测试、示例及工具链通过CMake编译,所需最低版本为3.19。
在HYTLASS项目根目录中创建一个build目录,然后执行cmake,你可以通过CMAKE选项`HYTLASS_HIPCC_ARCHS`指定编译的架构。
```bash
$ mkdir build && cd build
$ cmake .. -DHYTLASS_HIPCC_ARCHS=936 # compiles for DCU BW Architecture
```
你可以通过在build目录下使用make编译并执行`test_unit`来执行HYTLASS的单元测试。可以通过`-j`选项来并行执行make的流程。
```bash
$ make test_unit -j
...
...
...
[----------] Global test environment tear-down
[==========] 946 tests from 57 test cases ran. (10812 ms total)
[ PASSED ] 946 tests.
```
在所支持的硬件架构下,所有测试都应当通过,尽管不同硬件架构下的单元测试数量可能有所不同。
你也可以通过在build目录下,使用make编译`test_examples*`来执行所有示例或某个示例。
```bash
$ make test_examples -j
...
...
...
[100%] Built target test_examples_xxx
[100%] Built target test_examples
```
或在编译完成后,在`build/examples`目录下单独执行某个示例的可执行文件。
```bash
$ cd build/examples && ./00_hytlass_basic_gemm/gfx928_gemm_tensor_op
```
\ No newline at end of file
# Copyright (c) 2023 - 2025 Hygon Information Technology Co., Ltd. All rights reserved.
# Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# Generated file
set(TEST_SETS_SUPPORTED @TEST_SETS_SUPPORTED@)
if (NOT DEFINED ENV{HYTLASS_TEST_SETS})
set(ENV{HYTLASS_TEST_SETS} @HYTLASS_DEFAULT_ACTIVE_TEST_SETS@)
endif()
foreach(TEST_SET_REQUESTED IN ITEMS $ENV{HYTLASS_TEST_SETS})
if (NOT TEST_SET_REQUESTED IN_LIST TEST_SETS_SUPPORTED)
message(STATUS "Skipping tests for @TEST_EXE_PATH@ as ${TEST_SET_REQUESTED} is not in the set of [${TEST_SETS_SUPPORTED}].")
return()
endif()
endforeach()
set(TEST_EXE_PATH @TEST_EXE_PATH@)
set(TEST_EXE_WORKING_DIRECTORY @TEST_EXE_WORKING_DIRECTORY@)
set(HYTLASS_USE_EXTENDED_ADD_TEST_FORMAT @TEST_USE_EXTENDED_FORMAT@)
if (DEFINED ENV{HYTLASS_TEST_EXECUTION_ENVIRONMENT})
set(_HYTLASS_TEST_EXECUTION_ENVIRONMENT $ENV{HYTLASS_TEST_EXECUTION_ENVIRONMENT})
else()
set(_HYTLASS_TEST_EXECUTION_ENVIRONMENT @HYTLASS_TEST_EXECUTION_ENVIRONMENT@)
endif()
@_INLINE_PER_TEST_CODE@
# Copyright (c) 2023 - 2025 Hygon Information Technology Co., Ltd. All rights reserved.
# Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
if (HYTLASS_USE_EXTENDED_ADD_TEST_FORMAT)
# The longform/extended format allows generator expressions to be
# expanded property and is useful in contexts where the files need
# to be immediately included into being-processed cmake code.
add_test(NAME @TEST_NAME@ COMMAND ${_HYTLASS_TEST_EXECUTION_ENVIRONMENT} "${TEST_EXE_PATH}" @TEST_COMMAND_OPTIONS@)
else()
add_test(@TEST_NAME@ ${_HYTLASS_TEST_EXECUTION_ENVIRONMENT} "${TEST_EXE_PATH}" @TEST_COMMAND_OPTIONS@)
endif()
if (TEST_EXE_WORKING_DIRECTORY)
set_tests_properties(@TEST_NAME@ PROPERTIES WORKING_DIRECTORY "${TEST_EXE_WORKING_DIRECTORY}")
endif()
set_tests_properties(@TEST_NAME@ PROPERTIES DISABLED @__DISABLE_TESTS@)
get_filename_component(HygonHytlass_CMAKE_DIR "${CMAKE_CURRENT_LIST_FILE}" PATH)
include(CMakeFindDependencyMacro)
if(TARGET hygon::hytlass::HYTLASS)
return()
endif()
include("${HygonHytlass_CMAKE_DIR}/HygonHytlassTargets.cmake")
# Copyright (c) 2023 - 2025 Hygon Information Technology Co., Ltd. All rights reserved.
# Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
set(CPACK_PACKAGE_NAME HygonHytlass)
set(CPACK_PACKAGE_VENDOR HYGON)
set(CPACK_PACKAGE_CONTACT info@hygon.com)
set(CPACK_PACKAGE_DESCRIPTION_SUMMARY "HYTLASS CUDA C++ Template Linear Algebra Library")
set(CPACK_PACKAGE_INSTALL_DIRECTORY ${CPACK_PACKAGE_NAME})
set(CPACK_PACKAGE_VERSION_MAJOR ${PROJECT_VERSION_MAJOR})
set(CPACK_PACKAGE_VERSION_MINOR ${PROJECT_VERSION_MINOR})
set(CPACK_PACKAGE_VERSION_PATCH ${PROJECT_VERSION_PATCH})
set(CPACK_VERBATIM_VARIABLES YES)
# set(CPACK_PACKAGE_DESCRIPTION_FILE ${CMAKE_CURRENT_LIST_DIR}/Description.txt)
# set(CPACK_RESOURCE_FILE_WELCOME ${CMAKE_CURRENT_LIST_DIR}/Welcome.txt)
# set(CPACK_RESOURCE_FILE_LICENSE ${CMAKE_CURRENT_LIST_DIR}/License.txt)
# set(CPACK_RESOURCE_FILE_README ${CMAKE_CURRENT_LIST_DIR}/Readme.txt)
include(CPack)
# Copyright (c) 2023 - 2025 Hygon Information Technology Co., Ltd. All rights reserved.
# Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
include(FetchContent)
set(GOOGLETEST_DIR "" CACHE STRING "Location of local GoogleTest repo to build against")
if(GOOGLETEST_DIR)
set(FETCHCONTENT_SOURCE_DIR_GOOGLETEST ${GOOGLETEST_DIR} CACHE STRING "GoogleTest source directory override")
endif()
FetchContent_Declare(
googletest
GIT_REPOSITORY https://github.com/google/googletest.git
GIT_TAG v1.14.0
)
FetchContent_GetProperties(googletest)
if(NOT googletest_POPULATED)
FetchContent_Populate(googletest)
if (MSVC)
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
endif()
add_subdirectory(${googletest_SOURCE_DIR} ${googletest_BINARY_DIR} EXCLUDE_FROM_ALL)
endif()
/***************************************************************************************************
* Copyright (c) 2023 - 2025 Hygon Information Technology Co., Ltd. All rights reserved.
* Copyright (c) 2017 - 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: BSD-3-Clause
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
**************************************************************************************************/
/*! \file
\brief Basic HIP file for testing compiler flags.
*/
__device__ int inner()
{
return -1;
}
__global__ void test()
{
inner();
}
int main()
{
test<<<1,1>>>();
return 0;
}
/***************************************************************************************************
* Copyright (c) 2023 - 2025 Hygon Information Technology Co., Ltd. All rights reserved.
* Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: BSD-3-Clause
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
**************************************************************************************************/
#pragma once
#define HYTLASS_BUILD @HYTLASS_VERSION_BUILD@
#define HYTLASS_REVISION "@HYTLASS_REVISION@"
# Copyright (c) 2023 - 2025 Hygon Information Technology Co., Ltd. All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
hytlass_example_add_executable(
gfx928_gemm_tensor_op
gfx928_gemm_tensor_op.cu
)
hytlass_example_add_executable(
gfx928_gemm_tensor_op_mixed
gfx928_gemm_tensor_op_mixed.cu
)
This diff is collapsed.
This diff is collapsed.
# Copyright (c) 2023 - 2025 Hygon Information Technology Co., Ltd. All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
hytlass_example_add_executable(
gfx928_serial_splitk_gemm
gfx928_serial_splitk_gemm.cu
)
\ No newline at end of file
/***************************************************************************************************
* Copyright (c) 2023 - 2025 Hygon Information Technology Co., Ltd. All rights reserved.
* SPDX-License-Identifier: BSD-3-Clause
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3. Neither the name of the copyright holder nor the names of its
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
**************************************************************************************************/
/**
This example shows how to use serial split-k version of matrix multiplication using functions and
data structures provided by HYTLASS.
Examples:
# Runs a serial split-K GEMM with the given problem size (use split_k_slices to set number of K slices)
$ ./gfx928_serial_splitk_gemm --m=5120 --n=5120 --k=8192 --alpha=1 --beta=0 --split_k_slices=2 --iterations=10
*/
#include <fstream>
#include <iostream>
#include "hip/hip_runtime.h"
#include "hytlass/hytlass.h"
#include "hytlass/gemm/device/gemm.h"
#include "hytlass/gemm/device/gemm_universal.h"
#include "hytlass/util/command_line.h"
#include "hytlass/util/host_tensor.h"
#include "hytlass/util/reference/device/gemm.h"
#include "hytlass/util/reference/host/tensor_compare.h"
#include "hytlass/util/reference/host/tensor_copy.h"
#include "hytlass/util/reference/host/tensor_fill.h"
#include "hytlass/util/tensor_view_io.h"
#include "helper.h"
/////////////////////////////////////////////////////////////////////////////////////////////////
/// Result structure
struct Result {
double runtime_ms;
double gflops;
hytlass::Status status;
hipError_t error;
bool passed;
Result(
double runtime_ms = 0,
double gflops = 0,
hytlass::Status status = hytlass::Status::kSuccess,
hipError_t error = hipSuccess)
:
runtime_ms(runtime_ms), gflops(gflops), status(status), error(error), passed(true)
{}
};
///////////////////////////////////////////////////////////////////////////////////////////////////
// Command line options parsing
struct Options {
bool help;
hytlass::gemm::GemmCoord problem_size;
float alpha;
float beta;
bool reference_check;
int iterations;
int split_k_slices;
Options():
help(false),
problem_size({8192, 8192, 2048}),
reference_check(true),
iterations(10),
split_k_slices(1),
alpha(1),
beta()
{}
bool valid() {
return true;
}
// Parses the command line
void parse(int argc, char const **args) {
hytlass::CommandLine cmd(argc, args);
if (cmd.check_cmd_line_flag("help")) {
help = true;
}
cmd.get_cmd_line_argument("m", problem_size.m());
cmd.get_cmd_line_argument("n", problem_size.n());
cmd.get_cmd_line_argument("k", problem_size.k());
cmd.get_cmd_line_argument("alpha", alpha);
cmd.get_cmd_line_argument("beta", beta);
cmd.get_cmd_line_argument("split_k_slices", split_k_slices);
cmd.get_cmd_line_argument("iterations", iterations);
}
/// Prints the usage statement.
std::ostream &print_usage(std::ostream &out) const {
out << "01_hytlass_serial_splitk_gemm\n\n"
<< "Options:\n\n"
<< " --help If specified, displays this usage statement.\n\n"
<< " --m=<int> GEMM M dimension\n"
<< " --n=<int> GEMM N dimension\n"
<< " --k=<int> GEMM K dimension\n"
<< " --alpha=<f32> Epilogue scalar alpha\n"
<< " --beta=<f32> Epilogue scalar beta\n\n"
<< " --split_k_slices=<int> Split-K factor to emulate\n\n"
<< " --iterations=<int> Number of profiling iterations to perform.\n\n";
out << "\n\nExamples:\n\n"
<< "$ ./examples/01_hytlass_serial_splitk_gemm/gfx928_serial_splitk_gemm --m=1024 --n=512 --k=1024 \\\n"
<< " --alpha=2 --beta=0.707 --split_k_slices=2 \n\n";
return out;
}
/// Compute performance in GFLOP/s
double gflops(double runtime_s) const {
int64_t fmas = problem_size.product();
return 2.0 * double(fmas) / double(1.0e9) / runtime_s;
}
};
///////////////////////////////////////////////////////////////////////////////////////////////////
// The code section below describes datatype for input, output matrices and computation between
// elements in input matrices.
using ElementAccumulator = float; // <- data type of accumulator
using ElementComputeEpilogue = ElementAccumulator; // <- data type of epilogue operations
using ElementInputA = hytlass::half_t; // <- data type of elements in input matrix A
using ElementInputB = hytlass::half_t; // <- data type of elements in input matrix B
using ElementOutput = hytlass::half_t; // <- data type of elements in output matrix D
constexpr int kAlignmentA = 128 / hytlass::sizeof_bits<ElementInputA>::value;
constexpr int kAlignmentB = 128 / hytlass::sizeof_bits<ElementInputB>::value;
// The code section below describes matrix layout of input and output matrices. Column Major for
// Matrix A, Row Major for Matrix B and Row Major for Matrix C
using LayoutInputA = hytlass::layout::ColumnMajor;
using LayoutInputB = hytlass::layout::RowMajor;
using LayoutOutput = hytlass::layout::RowMajor;
// This code section describes whether you want to use tensor cores or regular SIMT cores on GPU SM
using MMAOp = hytlass::arch::OpClassTensorOp;
// This code section describes GFX architecture number
using SmArch = hytlass::arch::Gfx928;
// This code section describes the tile size a thread block will compute
using ShapeMMAThreadBlock =
hytlass::gemm::GemmShape<128, 128, 32>; // <- threadblock tile M = 128, N = 128, K = 16
// This code section describes tile size a warp will compute
using ShapeMMAWarp = hytlass::gemm::GemmShape<64, 64, 32>; // <- warp tile M = 64, N = 64, K = 16
// This code section describes the size of MMA op
using ShapeMMAOp = hytlass::gemm::GemmShape<16, 16, 16>;
// This code section describes how threadblocks are scheduled on GPU
using SwizzleThreadBlock = hytlass::gemm::threadblock::GemmIdentityThreadblockSwizzle<>;
static bool const kSplitKSerial = true;
// This code section describes the epilogue part of the kernel
using EpilogueOp = hytlass::epilogue::thread::LinearCombination<
ElementOutput, // <- data type of output matrix
128 / hytlass::sizeof_bits<ElementOutput>::value, // <- the number of elements per vectorized
// memory access. For a byte, it's 16
// elements. This becomes the vector width of
// math instructions in the epilogue too
ElementAccumulator, // <- data type of accumulator
ElementComputeEpilogue>; // <- data type for alpha/beta in linear combination function
// Number of pipelines you want to use
constexpr int NumStages = 1;
using Gemm = hytlass::gemm::device::Gemm<ElementInputA,
LayoutInputA,
ElementInputB,
LayoutInputB,
ElementOutput,
LayoutOutput,
ElementAccumulator,
MMAOp,
SmArch,
ShapeMMAThreadBlock,
ShapeMMAWarp,
ShapeMMAOp,
EpilogueOp,
SwizzleThreadBlock,
NumStages,
kAlignmentA,
kAlignmentB,
kSplitKSerial>;
int run(Options &options) {
// Create a tuple of problem size for matrix multiplication
hytlass::gemm::GemmCoord problem_size = options.problem_size;
// Initialize tensors using HYTLASS helper functions
hytlass::HostTensor<ElementInputA, LayoutInputA> tensor_a(
problem_size.mk()); // <- Create matrix A with dimensions M x K
hytlass::HostTensor<ElementInputB, LayoutInputB> tensor_b(
problem_size.kn()); // <- Create matrix B with dimensions K x N
hytlass::HostTensor<ElementOutput, LayoutOutput> tensor_c(
problem_size.mn()); // <- Create matrix C with dimensions M x N
hytlass::HostTensor<ElementOutput, LayoutOutput> tensor_d(
problem_size.mn()); // <- Create matrix D with dimensions M x N used to store output from
// HYTLASS kernel
hytlass::HostTensor<ElementOutput, LayoutOutput> tensor_ref_d(
problem_size.mn()); // <- Create matrix D with dimensions M x N used to store output from
// reference kernel
// Fill input and output matrices on host using HYTLASS helper functions
hytlass::reference::host::TensorFillRandomUniform(
tensor_a.host_view(),
1,
ElementInputA(4),
ElementInputA(-4),
0); // <- Fill matrix A on host with uniform-distribution random data
hytlass::reference::host::TensorFillRandomUniform(
tensor_b.host_view(),
1,
ElementInputB(4),
ElementInputB(-4),
0); // <- Fill matrix B on host with uniform-distribution random data
// hytlass::reference::host::TensorFillSequential(
// tensor_a.host_view()); // <- Fill matrix A on host with uniform-distribution random data
// hytlass::reference::host::TensorFillSequential(
// tensor_b.host_view()); // <- Fill matrix B on host with uniform-distribution random data
hytlass::reference::host::TensorFillRandomUniform(
tensor_c.host_view(),
1,
ElementOutput(4),
ElementOutput(-4),
0); // <- Fill matrix C on host with uniform-distribution random data
hytlass::reference::host::TensorFill(tensor_d.host_view()); // <- fill matrix D on host with zeros
hytlass::reference::host::TensorFill(
tensor_ref_d.host_view()); // <- fill matrix D for reference on host with zeros
// Copy data from host to GPU
tensor_a.sync_device();
tensor_b.sync_device();
tensor_c.sync_device();
tensor_d.sync_device();
tensor_ref_d.sync_device();
// Initialize alpha and beta for dot product computation
ElementComputeEpilogue alpha = ElementComputeEpilogue(options.alpha);
ElementComputeEpilogue beta = ElementComputeEpilogue(options.beta);
// Split K dimension into 1 partitions
int split_k_slices = options.split_k_slices;
// Create a tuple of gemm kernel arguments. This is later passed as arguments to launch
// instantiated HYTLASS kernel
typename Gemm::Arguments arguments{problem_size, // <- problem size of matrix multiplication
tensor_a.device_ref(), // <- reference to matrix A on device
tensor_b.device_ref(), // <- reference to matrix B on device
tensor_c.device_ref(), // <- reference to matrix C on device
tensor_d.device_ref(), // <- reference to matrix D on device
{alpha, beta}, // <- tuple of alpha and beta
split_k_slices}; // <- k-dimension split factor
// Using the arguments, query for extra workspace required for matrix multiplication computation
size_t workspace_size = Gemm::get_workspace_size(arguments);
// Allocate workspace memory
hytlass::device_memory::allocation<uint8_t> workspace(workspace_size);
// Instantiate HYTLASS kernel depending on templates
Gemm gemm_op;
// Check the problem size is supported or not
hytlass::Status status = gemm_op.can_implement(arguments);
HYTLASS_CHECK(status);
// Initialize HYTLASS kernel with arguments and workspace pointer
status = gemm_op.initialize(arguments, workspace.get());
HYTLASS_CHECK(status);
// Launch initialized HYTLASS kernel
status = gemm_op();
HYTLASS_CHECK(status);
// Result structure
Result result;
for (int i=0; i<10; i++) {
status = gemm_op();
}
HYTLASS_CHECK(status);
//
// Construct events
//
hipEvent_t events[2];
for (auto &event : events) {
result.error = hipEventCreate(&event);
if (result.error != hipSuccess) {
std::cerr << "hipEventCreate() failed: " << hipGetErrorString(result.error) << std::endl;
return -1;
}
}
// Record an event at the start of a series of GEMMs
result.error = hipEventRecord(events[0]);
if (result.error != hipSuccess) {
std::cerr << "hipEventRecord() failed: " << hipGetErrorString(result.error) << std::endl;
return -1;
}
//
// Run profiling loop
//
for (int iter = 0; iter < options.iterations; ++iter) {
// Launch initialized HYTLASS kernel
status = gemm_op();
HYTLASS_CHECK(status);
}
//
// Stop profiling loop
//
// Record an event when the GEMMs are complete
result.error = hipEventRecord(events[1]);
if (result.error != hipSuccess) {
std::cerr << "hipEventRecord() failed: " << hipGetErrorString(result.error) << std::endl;
return -1;
}
// Wait for work on the device to complete.
result.error = hipEventSynchronize(events[1]);
if (result.error != hipSuccess) {
std::cerr << "hipEventSynchronize() failed: " << hipGetErrorString(result.error) << std::endl;
return -1;
}
// Measure elapsed runtime
float runtime_ms = 0;
result.error = hipEventElapsedTime(&runtime_ms, events[0], events[1]);
if (result.error != hipSuccess) {
std::cerr << "hipEventElapsed() failed: " << hipGetErrorString(result.error) << std::endl;
return -1;
}
// Compute average runtime and GFLOPs.
result.runtime_ms = double(runtime_ms) / double(options.iterations);
result.gflops = options.gflops(result.runtime_ms / 1000.0);
// Cleanup
for (auto event : events) {
(void)hipEventDestroy(event);
}
if (options.reference_check) {
// Create instantiation for device reference gemm kernel
hytlass::reference::device::Gemm<ElementInputA,
LayoutInputA,
ElementInputB,
LayoutInputB,
ElementOutput,
LayoutOutput,
ElementComputeEpilogue,
ElementComputeEpilogue>
gemm_device;
// Launch device reference gemm kernel
gemm_device(problem_size,
alpha,
tensor_a.device_ref(),
tensor_b.device_ref(),
beta,
tensor_c.device_ref(),
tensor_ref_d.device_ref());
// Wait for kernels to finish
(void)hipDeviceSynchronize();
// Copy output data from HYTLASS and reference kernel to host for comparison
tensor_d.sync_host();
tensor_ref_d.sync_host();
// Check if output from HYTLASS kernel and reference kernel are equal or not
ElementOutput eps(0.05);
const ElementOutput non_zero_floor(1e-6f);
result.passed = hytlass::reference::host::TensorRelativelyEquals(
tensor_ref_d.host_view(), tensor_d.host_view(), eps, non_zero_floor);
}
if (!result.passed) {
std::stringstream fname;
fname << "error_Gemm_device_" << problem_size.m() << "x" << problem_size.n() << "x"
<< problem_size.k() << "_" << ShapeMMAThreadBlock{}.kM << "_" << ShapeMMAThreadBlock{}.kN
<< "_" << ShapeMMAThreadBlock{}.kK << ".csv";
std::ofstream file(fname.str());
file << "problem: " << ' ' << problem_size.m() << "x" << problem_size.n() << "x"
<< problem_size.k() << ", alpha: " << float(alpha) << ", beta: " << float(beta) << "\n\n";
file << "A =\n"
<< tensor_a.host_view() << "\nB =\n"
<< tensor_b.host_view() << "\nC =\n"
<< tensor_c.host_view() << "\n\nReference =\n"
<< tensor_ref_d.host_view() << "\n\nComputed =\n"
<< tensor_d.host_view();
}
std::cout << (result.passed ? "Passed" : "Failed") << std::endl;
if (result.passed) {
std::cout << "Runtime: " << result.runtime_ms << " ms" << std::endl;
std::cout << "GFLOPs: " << result.gflops << std::endl;
}
return (result.passed ? 0 : -1);
}
int main(int argc, const char **argv) {
Options options;
options.parse(argc, argv);
if (options.help) {
options.print_usage(std::cout) << std::endl;
return 0;
}
printf("%d x %d x %d tensor op Matrix Multiply\n",
options.problem_size.m(), options.problem_size.n(), options.problem_size.k());
if (!options.valid()) {
std::cerr << "Invalid problem." << std::endl;
return -1;
}
return run(options);
}
# Copyright (c) 2023 - 2025 Hygon Information Technology Co., Ltd. All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
hytlass_example_add_executable(
gfx928_parallel_splitk_gemm
gfx928_parallel_splitk_gemm.cu
)
\ No newline at end of file
# Copyright (c) 2023 - 2025 Hygon Information Technology Co., Ltd. All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
hytlass_example_add_executable(
gfx928_streamk_gemm
gfx928_streamk_gemm.cu
)
hytlass_example_add_executable(
gfx928_gemm_universal_streamk_broadcast
gfx928_gemm_universal_streamk_broadcast.cu
)
\ No newline at end of file
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment