Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
8ebc0d8c
Commit
8ebc0d8c
authored
Oct 16, 2023
by
illsilin
Browse files
replace ccache with sccache, pin package versions
parent
c99323be
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
39 additions
and
18 deletions
+39
-18
CMakeLists.txt
CMakeLists.txt
+3
-10
Dockerfile
Dockerfile
+14
-5
Jenkinsfile
Jenkinsfile
+1
-1
README.md
README.md
+21
-2
No files found.
CMakeLists.txt
View file @
8ebc0d8c
...
...
@@ -373,9 +373,10 @@ include_directories(BEFORE
SET
(
BUILD_DEV ON CACHE BOOL
"BUILD_DEV"
)
if
(
BUILD_DEV
)
add_compile_options
(
-Werror
)
add_compile_options
(
-Weverything
)
add_compile_options
(
-Werror -Weverything
)
endif
()
#add flags to reduce the size of binaries
add_compile_options
(
-Oz -flto=thin
)
message
(
"CMAKE_CXX_FLAGS:
${
CMAKE_CXX_FLAGS
}
"
)
add_custom_target
(
check COMMAND
${
CMAKE_CTEST_COMMAND
}
--output-on-failure -C
${
CMAKE_CFG_INTDIR
}
)
...
...
@@ -390,35 +391,27 @@ IF(IS_DIRECTORY "${PROJECT_SOURCE_DIR}/library/src/tensor_operation_instance/gpu
file
(
READ
"
${
PROJECT_SOURCE_DIR
}
/library/src/tensor_operation_instance/gpu/
${
subdir_path
}
/CMakeLists.txt"
cmake_instance
)
set
(
add_inst 0
)
if
((
"
${
cmake_instance
}
"
MATCHES
"fp8"
OR
"
${
cmake_instance
}
"
MATCHES
"_f8"
)
AND DTYPES MATCHES
"fp8"
)
#message("fp8 instance found!")
set
(
add_inst 1
)
endif
()
if
((
"
${
cmake_instance
}
"
MATCHES
"bf8"
OR
"
${
cmake_instance
}
"
MATCHES
"_b8"
)
AND DTYPES MATCHES
"bf8"
)
#message("bf8 instance found!")
set
(
add_inst 1
)
endif
()
if
((
"
${
cmake_instance
}
"
MATCHES
"fp16"
OR
"
${
cmake_instance
}
"
MATCHES
"_f16"
)
AND DTYPES MATCHES
"fp16"
)
#message("fp16 instance found!")
set
(
add_inst 1
)
endif
()
if
((
"
${
cmake_instance
}
"
MATCHES
"fp32"
OR
"
${
cmake_instance
}
"
MATCHES
"_f32"
)
AND DTYPES MATCHES
"fp32"
)
#message("fp32 instance found!")
set
(
add_inst 1
)
endif
()
if
((
"
${
cmake_instance
}
"
MATCHES
"fp64"
OR
"
${
cmake_instance
}
"
MATCHES
"_f64"
)
AND DTYPES MATCHES
"fp64"
)
#message("fp64 instance found!")
set
(
add_inst 1
)
endif
()
if
((
"
${
cmake_instance
}
"
MATCHES
"bf16"
OR
"
${
cmake_instance
}
"
MATCHES
"_b16"
)
AND DTYPES MATCHES
"bf16"
)
#message("bf16 instance found!")
set
(
add_inst 1
)
endif
()
if
((
"
${
cmake_instance
}
"
MATCHES
"int8"
OR
"
${
cmake_instance
}
"
MATCHES
"_i8"
)
AND DTYPES MATCHES
"int8"
)
#message("int8 instance found!")
set
(
add_inst 1
)
endif
()
if
(
NOT
"
${
cmake_instance
}
"
MATCHES
"DTYPES"
)
#message("instance should be built for all types!")
set
(
add_inst 1
)
endif
()
if
(
add_inst EQUAL 1 OR NOT DEFINED DTYPES
)
...
...
Dockerfile
View file @
8ebc0d8c
...
...
@@ -26,10 +26,17 @@ RUN wget -qO - http://repo.radeon.com/rocm/rocm.gpg.key | apt-key add - && \
RUN
sh
-c
"echo deb http://mirrors.kernel.org/ubuntu focal main universe | tee -a /etc/apt/sources.list"
RUN
amdgpu-install
-y
--usecase
=
rocm
--no-dkms
## Sccache binary built from source for ROCm
ARG
SCCACHE_REPO_URL=http://compute-artifactory.amd.com/artifactory/rocm-generic-experimental/rocm-sccache
ENV
SCCACHE_INSTALL_LOCATION=/usr/local/.cargo/bin
RUN
mkdir
-p
${
SCCACHE_INSTALL_LOCATION
}
&&
\
curl
${
SCCACHE_REPO_URL
}
/portable/0.2.16/sccache-0.2.16-alpha.1-rocm
--output
${
SCCACHE_INSTALL_LOCATION
}
/sccache
&&
\
chmod
+x
${
SCCACHE_INSTALL_LOCATION
}
/sccache
ENV
PATH=$PATH:${SCCACHE_INSTALL_LOCATION}
# Install dependencies
RUN
apt-get update
&&
DEBIAN_FRONTEND
=
noninteractive apt-get
install
-y
--allow-unauthenticated
\
build-essential
\
ccache
\
cmake
\
git
\
hip-rocclr
\
...
...
@@ -61,7 +68,7 @@ RUN gunzip /usr/local/bin/ninja.gz
RUN
chmod
a+x /usr/local/bin/ninja
RUN
git clone https://github.com/nico/ninjatracing.git
# Update the cmake to the latest version
RUN
pip
install
--upgrade
cmake
RUN
pip
install
--upgrade
cmake
=
3.27.5
# Setup ubsan environment to printstacktrace
RUN
ln
-s
/usr/bin/llvm-symbolizer-3.8 /usr/local/bin/llvm-symbolizer
...
...
@@ -75,10 +82,10 @@ ARG PREFIX=/opt/rocm
# Install packages for processing the performance results
RUN
pip3
install
--upgrade
pip
RUN
pip3
install
sqlalchemy
==
1.4.46
RUN
pip3
install
pymysql
RUN
pip3
install
pandas
RUN
pip3
install
pymysql
=
1.4.6
RUN
pip3
install
pandas
=
2.0.3
RUN
pip3
install
setuptools-rust
RUN
pip3
install
sshtunnel
RUN
pip3
install
sshtunnel
=
0.4.0
# Setup ubsan environment to printstacktrace
ENV
UBSAN_OPTIONS=print_stacktrace=1
...
...
@@ -114,6 +121,8 @@ RUN if [ "$compiler_version" = "amd-stg-open" ] && [ "$compiler_commit" != "" ];
else
echo
"using the release compiler"
;
\
fi
#clean-up the deb package
RUN
sh
-c
"rm -rf amdgpu-install*"
#ENV HIP_CLANG_PATH='/llvm-project/build/bin'
#RUN sh -c "echo HIP_CLANG_PATH = '$HIP_CLANG_PATH'"
Jenkinsfile
View file @
8ebc0d8c
...
...
@@ -221,7 +221,7 @@ def cmake_build(Map conf=[:]){
}
if
(
env
.
CK_CCACHE
)
{
setup_args
=
" -DCMAKE_CXX_COMPILER_LAUNCHER=
'
ccache
'
-DCMAKE_C_COMPILER_LAUNCHER=
'
ccache
'
"
+
setup_args
setup_args
=
" -DCMAKE_CXX_COMPILER_LAUNCHER=
s
ccache -DCMAKE_C_COMPILER_LAUNCHER=
s
ccache "
+
setup_args
}
echo
"ccache server: ${env.CK_CCACHE}"
...
...
README.md
View file @
8ebc0d8c
...
...
@@ -86,7 +86,6 @@ cmake
If GPU_TARGETS is not set on the cmake command line, CK will be built for all targets supported by the
current compiler.
Additional cmake flags can be used to significantly speed-up the build:
INSTANCES_ONLY (by default is OFF) must be set to ON in order to build only the instances and library
...
...
@@ -98,11 +97,31 @@ of select data types only. Currently, building of int8 instances is taking a lot
DL_KERNELS (by default is OFF) must be set to ON in order to build the gemm_dl and batched_gemm_multi_d_dl
instances. Those instances are only needed for the NAVI2x platforms.
### Using sccache for building
The default CK dockers come with pre-installed version of sccache which supports clang being used as hip-compiler
" -x hip". Using sccache can help reduce the time to re-build the code from hours to 1 - 2 minutes. In order to
invoke sccache, you need to run
```
bash
sccache
--start-server
```
and add the following flags to the cmake command line:
```
bash
-DCMAKE_CXX_COMPILER_LAUNCHER
=
sccache
-DCMAKE_C_COMPILER_LAUNCHER
=
sccache
```
### Build examples and tests
```
bash
make
-j
examples tests
make
test
```
### Build and run all examples and tests
```
bash
make
-j
check
```
Instructions for running each individual examples are under
[
example
](
/example
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment