Commit 8ebc0d8c authored by illsilin's avatar illsilin
Browse files

replace ccache with sccache, pin package versions

parent c99323be
...@@ -373,9 +373,10 @@ include_directories(BEFORE ...@@ -373,9 +373,10 @@ include_directories(BEFORE
SET(BUILD_DEV ON CACHE BOOL "BUILD_DEV") SET(BUILD_DEV ON CACHE BOOL "BUILD_DEV")
if(BUILD_DEV) if(BUILD_DEV)
add_compile_options(-Werror) add_compile_options(-Werror -Weverything)
add_compile_options(-Weverything)
endif() endif()
#add flags to reduce the size of binaries
add_compile_options(-Oz -flto=thin)
message("CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}") message("CMAKE_CXX_FLAGS: ${CMAKE_CXX_FLAGS}")
add_custom_target(check COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure -C ${CMAKE_CFG_INTDIR}) add_custom_target(check COMMAND ${CMAKE_CTEST_COMMAND} --output-on-failure -C ${CMAKE_CFG_INTDIR})
...@@ -390,35 +391,27 @@ IF(IS_DIRECTORY "${PROJECT_SOURCE_DIR}/library/src/tensor_operation_instance/gpu ...@@ -390,35 +391,27 @@ IF(IS_DIRECTORY "${PROJECT_SOURCE_DIR}/library/src/tensor_operation_instance/gpu
file(READ "${PROJECT_SOURCE_DIR}/library/src/tensor_operation_instance/gpu/${subdir_path}/CMakeLists.txt" cmake_instance) file(READ "${PROJECT_SOURCE_DIR}/library/src/tensor_operation_instance/gpu/${subdir_path}/CMakeLists.txt" cmake_instance)
set(add_inst 0) set(add_inst 0)
if(("${cmake_instance}" MATCHES "fp8" OR "${cmake_instance}" MATCHES "_f8") AND DTYPES MATCHES "fp8") if(("${cmake_instance}" MATCHES "fp8" OR "${cmake_instance}" MATCHES "_f8") AND DTYPES MATCHES "fp8")
#message("fp8 instance found!")
set(add_inst 1) set(add_inst 1)
endif() endif()
if(("${cmake_instance}" MATCHES "bf8" OR "${cmake_instance}" MATCHES "_b8") AND DTYPES MATCHES "bf8") if(("${cmake_instance}" MATCHES "bf8" OR "${cmake_instance}" MATCHES "_b8") AND DTYPES MATCHES "bf8")
#message("bf8 instance found!")
set(add_inst 1) set(add_inst 1)
endif() endif()
if(("${cmake_instance}" MATCHES "fp16" OR "${cmake_instance}" MATCHES "_f16") AND DTYPES MATCHES "fp16") if(("${cmake_instance}" MATCHES "fp16" OR "${cmake_instance}" MATCHES "_f16") AND DTYPES MATCHES "fp16")
#message("fp16 instance found!")
set(add_inst 1) set(add_inst 1)
endif() endif()
if(("${cmake_instance}" MATCHES "fp32" OR "${cmake_instance}" MATCHES "_f32") AND DTYPES MATCHES "fp32") if(("${cmake_instance}" MATCHES "fp32" OR "${cmake_instance}" MATCHES "_f32") AND DTYPES MATCHES "fp32")
#message("fp32 instance found!")
set(add_inst 1) set(add_inst 1)
endif() endif()
if(("${cmake_instance}" MATCHES "fp64" OR "${cmake_instance}" MATCHES "_f64") AND DTYPES MATCHES "fp64") if(("${cmake_instance}" MATCHES "fp64" OR "${cmake_instance}" MATCHES "_f64") AND DTYPES MATCHES "fp64")
#message("fp64 instance found!")
set(add_inst 1) set(add_inst 1)
endif() endif()
if(("${cmake_instance}" MATCHES "bf16" OR "${cmake_instance}" MATCHES "_b16") AND DTYPES MATCHES "bf16") if(("${cmake_instance}" MATCHES "bf16" OR "${cmake_instance}" MATCHES "_b16") AND DTYPES MATCHES "bf16")
#message("bf16 instance found!")
set(add_inst 1) set(add_inst 1)
endif() endif()
if(("${cmake_instance}" MATCHES "int8" OR "${cmake_instance}" MATCHES "_i8") AND DTYPES MATCHES "int8") if(("${cmake_instance}" MATCHES "int8" OR "${cmake_instance}" MATCHES "_i8") AND DTYPES MATCHES "int8")
#message("int8 instance found!")
set(add_inst 1) set(add_inst 1)
endif() endif()
if(NOT "${cmake_instance}" MATCHES "DTYPES") if(NOT "${cmake_instance}" MATCHES "DTYPES")
#message("instance should be built for all types!")
set(add_inst 1) set(add_inst 1)
endif() endif()
if(add_inst EQUAL 1 OR NOT DEFINED DTYPES) if(add_inst EQUAL 1 OR NOT DEFINED DTYPES)
......
...@@ -26,10 +26,17 @@ RUN wget -qO - http://repo.radeon.com/rocm/rocm.gpg.key | apt-key add - && \ ...@@ -26,10 +26,17 @@ RUN wget -qO - http://repo.radeon.com/rocm/rocm.gpg.key | apt-key add - && \
RUN sh -c "echo deb http://mirrors.kernel.org/ubuntu focal main universe | tee -a /etc/apt/sources.list" RUN sh -c "echo deb http://mirrors.kernel.org/ubuntu focal main universe | tee -a /etc/apt/sources.list"
RUN amdgpu-install -y --usecase=rocm --no-dkms RUN amdgpu-install -y --usecase=rocm --no-dkms
## Sccache binary built from source for ROCm
ARG SCCACHE_REPO_URL=http://compute-artifactory.amd.com/artifactory/rocm-generic-experimental/rocm-sccache
ENV SCCACHE_INSTALL_LOCATION=/usr/local/.cargo/bin
RUN mkdir -p ${SCCACHE_INSTALL_LOCATION} && \
curl ${SCCACHE_REPO_URL}/portable/0.2.16/sccache-0.2.16-alpha.1-rocm --output ${SCCACHE_INSTALL_LOCATION}/sccache && \
chmod +x ${SCCACHE_INSTALL_LOCATION}/sccache
ENV PATH=$PATH:${SCCACHE_INSTALL_LOCATION}
# Install dependencies # Install dependencies
RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-unauthenticated \
build-essential \ build-essential \
ccache \
cmake \ cmake \
git \ git \
hip-rocclr \ hip-rocclr \
...@@ -61,7 +68,7 @@ RUN gunzip /usr/local/bin/ninja.gz ...@@ -61,7 +68,7 @@ RUN gunzip /usr/local/bin/ninja.gz
RUN chmod a+x /usr/local/bin/ninja RUN chmod a+x /usr/local/bin/ninja
RUN git clone https://github.com/nico/ninjatracing.git RUN git clone https://github.com/nico/ninjatracing.git
# Update the cmake to the latest version # Update the cmake to the latest version
RUN pip install --upgrade cmake RUN pip install --upgrade cmake=3.27.5
# Setup ubsan environment to printstacktrace # Setup ubsan environment to printstacktrace
RUN ln -s /usr/bin/llvm-symbolizer-3.8 /usr/local/bin/llvm-symbolizer RUN ln -s /usr/bin/llvm-symbolizer-3.8 /usr/local/bin/llvm-symbolizer
...@@ -75,10 +82,10 @@ ARG PREFIX=/opt/rocm ...@@ -75,10 +82,10 @@ ARG PREFIX=/opt/rocm
# Install packages for processing the performance results # Install packages for processing the performance results
RUN pip3 install --upgrade pip RUN pip3 install --upgrade pip
RUN pip3 install sqlalchemy==1.4.46 RUN pip3 install sqlalchemy==1.4.46
RUN pip3 install pymysql RUN pip3 install pymysql=1.4.6
RUN pip3 install pandas RUN pip3 install pandas=2.0.3
RUN pip3 install setuptools-rust RUN pip3 install setuptools-rust
RUN pip3 install sshtunnel RUN pip3 install sshtunnel=0.4.0
# Setup ubsan environment to printstacktrace # Setup ubsan environment to printstacktrace
ENV UBSAN_OPTIONS=print_stacktrace=1 ENV UBSAN_OPTIONS=print_stacktrace=1
...@@ -114,6 +121,8 @@ RUN if [ "$compiler_version" = "amd-stg-open" ] && [ "$compiler_commit" != "" ]; ...@@ -114,6 +121,8 @@ RUN if [ "$compiler_version" = "amd-stg-open" ] && [ "$compiler_commit" != "" ];
else echo "using the release compiler"; \ else echo "using the release compiler"; \
fi fi
#clean-up the deb package
RUN sh -c "rm -rf amdgpu-install*"
#ENV HIP_CLANG_PATH='/llvm-project/build/bin' #ENV HIP_CLANG_PATH='/llvm-project/build/bin'
#RUN sh -c "echo HIP_CLANG_PATH = '$HIP_CLANG_PATH'" #RUN sh -c "echo HIP_CLANG_PATH = '$HIP_CLANG_PATH'"
...@@ -221,7 +221,7 @@ def cmake_build(Map conf=[:]){ ...@@ -221,7 +221,7 @@ def cmake_build(Map conf=[:]){
} }
if(env.CK_CCACHE) if(env.CK_CCACHE)
{ {
setup_args = " -DCMAKE_CXX_COMPILER_LAUNCHER='ccache' -DCMAKE_C_COMPILER_LAUNCHER='ccache' " + setup_args setup_args = " -DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache " + setup_args
} }
echo "ccache server: ${env.CK_CCACHE}" echo "ccache server: ${env.CK_CCACHE}"
......
...@@ -86,7 +86,6 @@ cmake ...@@ -86,7 +86,6 @@ cmake
If GPU_TARGETS is not set on the cmake command line, CK will be built for all targets supported by the If GPU_TARGETS is not set on the cmake command line, CK will be built for all targets supported by the
current compiler. current compiler.
Additional cmake flags can be used to significantly speed-up the build: Additional cmake flags can be used to significantly speed-up the build:
INSTANCES_ONLY (by default is OFF) must be set to ON in order to build only the instances and library INSTANCES_ONLY (by default is OFF) must be set to ON in order to build only the instances and library
...@@ -98,11 +97,31 @@ of select data types only. Currently, building of int8 instances is taking a lot ...@@ -98,11 +97,31 @@ of select data types only. Currently, building of int8 instances is taking a lot
DL_KERNELS (by default is OFF) must be set to ON in order to build the gemm_dl and batched_gemm_multi_d_dl DL_KERNELS (by default is OFF) must be set to ON in order to build the gemm_dl and batched_gemm_multi_d_dl
instances. Those instances are only needed for the NAVI2x platforms. instances. Those instances are only needed for the NAVI2x platforms.
### Using sccache for building
The default CK dockers come with pre-installed version of sccache which supports clang being used as hip-compiler
" -x hip". Using sccache can help reduce the time to re-build the code from hours to 1 - 2 minutes. In order to
invoke sccache, you need to run
```bash
sccache --start-server
```
and add the following flags to the cmake command line:
```bash
-DCMAKE_CXX_COMPILER_LAUNCHER=sccache -DCMAKE_C_COMPILER_LAUNCHER=sccache
```
### Build examples and tests ### Build examples and tests
```bash ```bash
make -j examples tests make -j examples tests
make test ```
### Build and run all examples and tests
```bash
make -j check
``` ```
Instructions for running each individual examples are under [example](/example) Instructions for running each individual examples are under [example](/example)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment