Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel_ROCM
Commits
29f1d60d
Unverified
Commit
29f1d60d
authored
Dec 09, 2024
by
jakpiase
Committed by
GitHub
Dec 09, 2024
Browse files
Merge branch 'develop' into jakpiase/ck_tile_comp_gemm_uts
parents
7d658bbe
355893cd
Changes
9
Show whitespace changes
Inline
Side-by-side
Showing
9 changed files
with
235 additions
and
232 deletions
+235
-232
Jenkinsfile
Jenkinsfile
+98
-188
library/src/tensor_operation_instance/gpu/mha/CMakeLists.txt
library/src/tensor_operation_instance/gpu/mha/CMakeLists.txt
+3
-3
script/process_perf_data.py
script/process_perf_data.py
+2
-2
script/process_perf_data.sh
script/process_perf_data.sh
+13
-0
script/process_qa_data.sh
script/process_qa_data.sh
+12
-0
script/run_full_performance_tests.sh
script/run_full_performance_tests.sh
+1
-1
script/run_gemm_performance_tests.sh
script/run_gemm_performance_tests.sh
+41
-0
script/run_performance_tests.sh
script/run_performance_tests.sh
+6
-15
test/data_type/test_custom_type.cpp
test/data_type/test_custom_type.cpp
+59
-23
No files found.
Jenkinsfile
View file @
29f1d60d
...
@@ -330,10 +330,8 @@ def cmake_build(Map conf=[:]){
...
@@ -330,10 +330,8 @@ def cmake_build(Map conf=[:]){
try
{
try
{
archiveArtifacts
"perf_fmha_fwd_*.log"
archiveArtifacts
"perf_fmha_fwd_*.log"
archiveArtifacts
"perf_fmha_bwd_*.log"
archiveArtifacts
"perf_fmha_bwd_*.log"
stash
name:
"perf_fmha_fwd_gfx942.log"
stash
includes:
"perf_fmha_**_gfx942.log"
,
name:
"perf_fmha_log_gfx942"
stash
name:
"perf_fmha_bwd_gfx942.log"
stash
includes:
"perf_fmha_**_gfx90a.log"
,
name:
"perf_fmha_log_gfx90a"
stash
name:
"perf_fmha_fwd_gfx90a.log"
stash
name:
"perf_fmha_bwd_gfx90a.log"
}
}
catch
(
Exception
err
){
catch
(
Exception
err
){
echo
"could not locate the requested artifacts: ${err.getMessage()}. will skip the stashing."
echo
"could not locate the requested artifacts: ${err.getMessage()}. will skip the stashing."
...
@@ -408,128 +406,6 @@ def buildHipClangJobAndReboot(Map conf=[:]){
...
@@ -408,128 +406,6 @@ def buildHipClangJobAndReboot(Map conf=[:]){
}
}
}
}
def
runCKProfiler
(
Map
conf
=[:]){
show_node_info
()
env
.
HSA_ENABLE_SDMA
=
0
checkout
scm
def
image
=
getDockerImageName
()
def
prefixpath
=
conf
.
get
(
"prefixpath"
,
"/opt/rocm"
)
// Jenkins is complaining about the render group
def
dockerOpts
=
"--device=/dev/kfd --device=/dev/dri --group-add video --group-add render --cap-add=SYS_PTRACE --security-opt seccomp=unconfined"
if
(
conf
.
get
(
"enforce_xnack_on"
,
false
))
{
dockerOpts
=
dockerOpts
+
" --env HSA_XNACK=1 "
}
def
video_id
=
sh
(
returnStdout:
true
,
script:
'getent group video | cut -d: -f3'
)
def
render_id
=
sh
(
returnStdout:
true
,
script:
'getent group render | cut -d: -f3'
)
dockerOpts
=
dockerOpts
+
" --group-add=${video_id} --group-add=${render_id} "
echo
"Docker flags: ${dockerOpts}"
def
dockerArgs
=
"--build-arg PREFIX=${prefixpath} --build-arg compiler_version='${params.COMPILER_VERSION}' --build-arg compiler_commit='${params.COMPILER_COMMIT}' --build-arg ROCMVERSION='${params.ROCMVERSION}' "
def
variant
=
env
.
STAGE_NAME
def
retimage
gitStatusWrapper
(
credentialsId:
"${env.ck_git_creds}"
,
gitHubContext:
"Jenkins - ${variant}"
,
account:
'ROCm'
,
repo:
'composable_kernel'
)
{
try
{
(
retimage
,
image
)
=
getDockerImage
(
conf
)
withDockerContainer
(
image:
image
,
args:
dockerOpts
)
{
timeout
(
time:
5
,
unit:
'MINUTES'
){
sh
'rocminfo | tee rocminfo.log'
if
(
!
runShell
(
'grep -n "gfx" rocminfo.log'
)
){
throw
new
Exception
(
"GPU not found"
)
}
else
{
echo
"GPU is OK"
}
}
}
}
catch
(
org
.
jenkinsci
.
plugins
.
workflow
.
steps
.
FlowInterruptedException
e
){
echo
"The job was cancelled or aborted"
throw
e
}
withDockerContainer
(
image:
image
,
args:
dockerOpts
+
' -v=/var/jenkins/:/var/jenkins'
)
{
timeout
(
time:
24
,
unit:
'HOURS'
)
{
sh
"""
rm -rf build
mkdir build
"""
dir
(
"build"
){
unstash
'ckProfiler.tar.gz'
sh
'tar -xvf ckProfiler.tar.gz'
}
dir
(
"script"
){
if
(
params
.
RUN_FULL_QA
){
sh
"./run_full_performance_tests.sh 0 QA_${params.COMPILER_VERSION} ${env.BRANCH_NAME} ${NODE_NAME}"
archiveArtifacts
"perf_gemm.log"
archiveArtifacts
"perf_resnet50_N256.log"
archiveArtifacts
"perf_resnet50_N4.log"
archiveArtifacts
"perf_batched_gemm.log"
archiveArtifacts
"perf_grouped_gemm.log"
archiveArtifacts
"perf_grouped_conv_fwd.log"
archiveArtifacts
"perf_grouped_conv_bwd_data.log"
archiveArtifacts
"perf_grouped_conv_bwd_weight.log"
archiveArtifacts
"perf_gemm_bilinear.log"
archiveArtifacts
"perf_reduction.log"
archiveArtifacts
"perf_splitK_gemm.log"
archiveArtifacts
"perf_onnx_gemm.log"
archiveArtifacts
"perf_mixed_gemm.log"
// stash perf files to master
stash
name:
"perf_gemm.log"
stash
name:
"perf_resnet50_N256.log"
stash
name:
"perf_resnet50_N4.log"
stash
name:
"perf_batched_gemm.log"
stash
name:
"perf_grouped_gemm.log"
stash
name:
"perf_grouped_conv_fwd.log"
stash
name:
"perf_grouped_conv_bwd_data.log"
stash
name:
"perf_grouped_conv_bwd_weight.log"
stash
name:
"perf_gemm_bilinear.log"
stash
name:
"perf_reduction.log"
stash
name:
"perf_splitK_gemm.log"
stash
name:
"perf_onnx_gemm.log"
stash
name:
"perf_mixed_gemm.log"
//we will process results on the master node
}
else
{
sh
"./run_performance_tests.sh 0 CI_${params.COMPILER_VERSION} ${env.BRANCH_NAME} ${NODE_NAME}"
archiveArtifacts
"perf_gemm.log"
archiveArtifacts
"perf_resnet50_N256.log"
archiveArtifacts
"perf_resnet50_N4.log"
// stash perf files to master
stash
name:
"perf_gemm.log"
stash
name:
"perf_resnet50_N256.log"
stash
name:
"perf_resnet50_N4.log"
//we will process the results on the master node
}
}
}
}
}
return
retimage
}
def
runPerfTest
(
Map
conf
=[:]){
try
{
runCKProfiler
(
conf
)
}
catch
(
e
){
echo
"throwing error exception in performance tests"
echo
'Exception occurred: '
+
e
.
toString
()
throw
e
}
finally
{
if
(!
conf
.
get
(
"no_reboot"
,
false
))
{
reboot
()
}
}
}
def
Build_CK
(
Map
conf
=[:]){
def
Build_CK
(
Map
conf
=[:]){
show_node_info
()
show_node_info
()
...
@@ -589,36 +465,95 @@ def Build_CK(Map conf=[:]){
...
@@ -589,36 +465,95 @@ def Build_CK(Map conf=[:]){
throw
e
throw
e
}
}
withDockerContainer
(
image:
image
,
args:
dockerOpts
+
' -v=/var/jenkins/:/var/jenkins'
)
{
withDockerContainer
(
image:
image
,
args:
dockerOpts
+
' -v=/var/jenkins/:/var/jenkins'
)
{
timeout
(
time:
2
4
,
unit:
'HOURS'
)
timeout
(
time:
1
2
,
unit:
'HOURS'
)
{
{
//check whether to run performance tests on this node
//check whether to run performance tests on this node
def
do_perf_tests
=
0
def
arch_type
=
0
sh
'rocminfo | tee rocminfo.log'
sh
'rocminfo | tee rocminfo.log'
if
(
runShell
(
'grep -n "gfx1030" rocminfo.log'
)
||
runShell
(
'grep -n "gfx1101" rocminfo.log'
)
||
runShell
(
'grep -n "gfx1201" rocminfo.log'
)
||
runShell
(
'grep -n "gfx942" rocminfo.log'
)
){
if
(
runShell
(
'grep -n "gfx90a" rocminfo.log'
)
){
do_perf_tests
=
1
arch_type
=
1
echo
"Stash profiler and run performance tests"
}
else
if
(
runShell
(
'grep -n "gfx942" rocminfo.log'
)
)
{
arch_type
=
2
}
else
if
(
runShell
(
'grep -n "gfx1030" rocminfo.log'
)
)
{
arch_type
=
3
}
else
if
(
runShell
(
'grep -n "gfx1101" rocminfo.log'
)
)
{
arch_type
=
4
}
else
if
(
runShell
(
'grep -n "gfx1201" rocminfo.log'
)
)
{
arch_type
=
5
}
}
cmake_build
(
conf
)
cmake_build
(
conf
)
dir
(
"build"
){
dir
(
"build"
){
//run tests and examples
if
(
params
.
RUN_FULL_QA
&&
arch_type
==
1
){
//sh 'make -j check'
// build deb packages for all gfx9 targets on gfx90a system and prepare to export
if
(
params
.
RUN_PERFORMANCE_TESTS
&&
do_perf_tests
==
0
){
echo
"Build ckProfiler package"
//we only need the ckProfiler to run the performance tests, so we pack and stash it
//do not stash profiler on nodes where we don't need to run performance tests
sh
'tar -zcvf ckProfiler.tar.gz bin/ckProfiler'
stash
name:
"ckProfiler.tar.gz"
}
if
(
params
.
RUN_FULL_QA
&&
do_perf_tests
==
0
){
// build deb packages for all gfx9 targets and prepare to export
sh
'make -j package'
sh
'make -j package'
archiveArtifacts
artifacts:
'composablekernel-ckprofiler_*.deb'
archiveArtifacts
artifacts:
'composablekernel-ckprofiler_*.deb'
archiveArtifacts
artifacts:
'composablekernel-tests_*.deb'
sh
'mv composablekernel-ckprofiler_*.deb ckprofiler_0.2.0_amd64.deb'
sh
'mv composablekernel-ckprofiler_*.deb ckprofiler_0.2.0_amd64.deb'
stash
name:
"ckprofiler_0.2.0_amd64.deb"
stash
includes:
"ckprofiler_0.2.0_amd64.deb"
,
name:
"ckprofiler_0.2.0_amd64.deb"
}
}
}
}
if
(
params
.
hipTensor_test
&&
do_perf_tests
==
0
){
// run performance tests, stash the logs, results will be processed on the master node
//build and test hipTensor
dir
(
"script"
){
if
(
params
.
RUN_PERFORMANCE_TESTS
){
if
(
params
.
RUN_FULL_QA
&&
arch_type
==
1
){
// run full tests on gfx90a
echo
"Run full performance tests"
sh
"./run_full_performance_tests.sh 0 QA_${params.COMPILER_VERSION} ${env.BRANCH_NAME} ${NODE_NAME}"
archiveArtifacts
"perf_gemm.log"
archiveArtifacts
"perf_resnet50_N256.log"
archiveArtifacts
"perf_resnet50_N4.log"
archiveArtifacts
"perf_batched_gemm.log"
archiveArtifacts
"perf_grouped_gemm.log"
archiveArtifacts
"perf_grouped_conv_fwd.log"
archiveArtifacts
"perf_grouped_conv_bwd_data.log"
archiveArtifacts
"perf_grouped_conv_bwd_weight.log"
archiveArtifacts
"perf_gemm_bilinear.log"
archiveArtifacts
"perf_reduction.log"
archiveArtifacts
"perf_splitK_gemm.log"
archiveArtifacts
"perf_onnx_gemm.log"
archiveArtifacts
"perf_mixed_gemm.log"
stash
includes:
"perf_**.log"
,
name:
"perf_log"
}
else
if
(
arch_type
==
1
){
// run standard tests on gfx90a
echo
"Run performance tests"
sh
"./run_performance_tests.sh 0 CI_${params.COMPILER_VERSION} ${env.BRANCH_NAME} ${NODE_NAME}"
archiveArtifacts
"perf_gemm.log"
archiveArtifacts
"perf_onnx_gemm.log"
archiveArtifacts
"perf_resnet50_N256.log"
archiveArtifacts
"perf_resnet50_N4.log"
stash
includes:
"perf_**.log"
,
name:
"perf_log"
}
// disable performance tests on gfx1030 for now.
//else if ( arch_type == 3){
// run basic tests on gfx1030
// echo "Run gemm performance tests"
// sh "./run_gemm_performance_tests.sh 0 CI_${params.COMPILER_VERSION} ${env.BRANCH_NAME} ${NODE_NAME} gfx10"
// archiveArtifacts "perf_onnx_gemm_gfx10.log"
// stash includes: "perf_onnx_gemm_gfx10.log", name: "perf_log_gfx10"
//}
else
if
(
arch_type
==
4
){
// run basic tests on gfx11
echo
"Run gemm performance tests"
sh
"./run_gemm_performance_tests.sh 0 CI_${params.COMPILER_VERSION} ${env.BRANCH_NAME} ${NODE_NAME} gfx11"
archiveArtifacts
"perf_onnx_gemm_gfx11.log"
stash
includes:
"perf_onnx_gemm_gfx11.log"
,
name:
"perf_log_gfx11"
}
else
if
(
arch_type
==
5
){
// run basic tests on gfx12
echo
"Run gemm performance tests"
sh
"./run_gemm_performance_tests.sh 0 CI_${params.COMPILER_VERSION} ${env.BRANCH_NAME} ${NODE_NAME} gfx12"
archiveArtifacts
"perf_onnx_gemm_gfx12.log"
stash
includes:
"perf_onnx_gemm_gfx12.log"
,
name:
"perf_log_gfx12"
}
}
}
if
(
params
.
hipTensor_test
&&
arch_type
==
1
){
// build and test hipTensor on gfx90a node
sh
"""#!/bin/bash
sh
"""#!/bin/bash
rm -rf "${params.hipTensor_branch}".zip
rm -rf "${params.hipTensor_branch}".zip
rm -rf hipTensor-"${params.hipTensor_branch}"
rm -rf hipTensor-"${params.hipTensor_branch}"
...
@@ -690,10 +625,8 @@ def process_results(Map conf=[:]){
...
@@ -690,10 +625,8 @@ def process_results(Map conf=[:]){
dir
(
"script"
){
dir
(
"script"
){
if
(
params
.
RUN_CK_TILE_FMHA_TESTS
){
if
(
params
.
RUN_CK_TILE_FMHA_TESTS
){
try
{
try
{
unstash
"perf_fmha_fwd_gfx942.log"
unstash
"perf_fmha_log_gfx942"
unstash
"perf_fmha_bwd_gfx942.log"
unstash
"perf_fmha_log_gfx90a"
unstash
"perf_fmha_fwd_gfx90a.log"
unstash
"perf_fmha_bwd_gfx90a.log"
}
}
catch
(
Exception
err
){
catch
(
Exception
err
){
echo
"could not locate the FMHA performance logs: ${err.getMessage()}."
echo
"could not locate the FMHA performance logs: ${err.getMessage()}."
...
@@ -703,26 +636,26 @@ def process_results(Map conf=[:]){
...
@@ -703,26 +636,26 @@ def process_results(Map conf=[:]){
// unstash perf files to master
// unstash perf files to master
unstash
"ckprofiler_0.2.0_amd64.deb"
unstash
"ckprofiler_0.2.0_amd64.deb"
sh
"sshpass -p ${env.ck_deb_pw} scp -o StrictHostKeyChecking=no ckprofiler_0.2.0_amd64.deb ${env.ck_deb_user}@${env.ck_deb_ip}:/var/www/html/composable_kernel/"
sh
"sshpass -p ${env.ck_deb_pw} scp -o StrictHostKeyChecking=no ckprofiler_0.2.0_amd64.deb ${env.ck_deb_user}@${env.ck_deb_ip}:/var/www/html/composable_kernel/"
unstash
"perf_gemm.log"
unstash
"perf_log"
unstash
"perf_resnet50_N256.log"
try
{
unstash
"perf_resnet50_N4.log"
unstash
"perf_log_gfx11"
unstash
"perf_batched_gemm.log"
unstash
"perf_log_gfx12"
unstash
"perf_grouped_gemm.log"
}
unstash
"perf_grouped_conv_fwd.log"
catch
(
Exception
err
){
unstash
"perf_grouped_conv_bwd_data.log"
echo
"could not locate the GEMM gfx11/gfx12 performance logs: ${err.getMessage()}."
unstash
"perf_grouped_conv_bwd_weight.log"
}
unstash
"perf_gemm_bilinear.log"
unstash
"perf_reduction.log"
unstash
"perf_splitK_gemm.log"
unstash
"perf_onnx_gemm.log"
unstash
"perf_mixed_gemm.log"
sh
"./process_qa_data.sh"
sh
"./process_qa_data.sh"
}
}
else
{
else
{
// unstash perf files to master
// unstash perf files to master
unstash
"perf_gemm.log"
unstash
"perf_log"
unstash
"perf_resnet50_N256.log"
try
{
unstash
"perf_resnet50_N4.log"
unstash
"perf_log_gfx11"
unstash
"perf_log_gfx12"
}
catch
(
Exception
err
){
echo
"could not locate the GEMM gfx11/gfx12 performance logs: ${err.getMessage()}."
}
sh
"./process_perf_data.sh"
sh
"./process_perf_data.sh"
}
}
}
}
...
@@ -1241,29 +1174,6 @@ pipeline {
...
@@ -1241,29 +1174,6 @@ pipeline {
}
}
}
}
}
}
stage
(
"Performance Tests"
)
{
parallel
{
stage
(
"Run ckProfiler: gfx90a"
)
{
when
{
beforeAgent
true
expression
{
params
.
RUN_PERFORMANCE_TESTS
.
toBoolean
()
&&
!
params
.
BUILD_LEGACY_OS
.
toBoolean
()
}
}
options
{
retry
(
1
)
}
agent
{
label
rocmnode
(
"gfx90a"
)}
environment
{
setup_args
=
"NO_CK_BUILD"
}
steps
{
runPerfTest
(
setup_args:
setup_args
,
config_targets:
"ckProfiler"
,
no_reboot:
true
,
build_type:
'Release'
)
cleanWs
()
}
}
}
}
stage
(
"Process Performance Test Results"
)
stage
(
"Process Performance Test Results"
)
{
{
parallel
parallel
...
...
library/src/tensor_operation_instance/gpu/mha/CMakeLists.txt
View file @
29f1d60d
...
@@ -6,7 +6,7 @@ set(CK_TILE_SRC_FOLDER ${CMAKE_SOURCE_DIR}/include/ck_tile/)
...
@@ -6,7 +6,7 @@ set(CK_TILE_SRC_FOLDER ${CMAKE_SOURCE_DIR}/include/ck_tile/)
# CK Codegen requires dataclass which is added in Python 3.7
# CK Codegen requires dataclass which is added in Python 3.7
# Python version 3.8 is required for general good practice as it is default for Ubuntu 20.04
# Python version 3.8 is required for general good practice as it is default for Ubuntu 20.04
if
(
NOT CK_USE_ALTERNATIVE_PYTHON
)
if
(
NOT CK_USE_ALTERNATIVE_PYTHON
)
find_package
(
Python
Interp 3 REQUIRED
)
find_package
(
Python
3 COMPONENTS Interpreter Development
)
else
()
else
()
message
(
"Using alternative python version"
)
message
(
"Using alternative python version"
)
set
(
EXTRA_PYTHON_PATH
)
set
(
EXTRA_PYTHON_PATH
)
...
@@ -33,7 +33,7 @@ set(FMHA_KNOWN_APIS "fwd,fwd_splitkv,fwd_appendkv,bwd")
...
@@ -33,7 +33,7 @@ set(FMHA_KNOWN_APIS "fwd,fwd_splitkv,fwd_appendkv,bwd")
# Note: The receipt 3 arg filters the generated backwards instances to reduce compilation time.
# Note: The receipt 3 arg filters the generated backwards instances to reduce compilation time.
# With receipt 3 set, we are generating instances for datatype == {fp16 || bfp16}, bias == {no || alibi}, deterministic == off, and dpad == dvpad.
# With receipt 3 set, we are generating instances for datatype == {fp16 || bfp16}, bias == {no || alibi}, deterministic == off, and dpad == dvpad.
execute_process
(
execute_process
(
COMMAND
${
P
YTHON
_EXECUTABLE
}
${
FMHA_SRC_FOLDER
}
/generate.py
COMMAND
${
P
ython3
_EXECUTABLE
}
${
FMHA_SRC_FOLDER
}
/generate.py
--list_blobs
${
FMHA_CPP_FOLDER
}
/blob_list.txt
--list_blobs
${
FMHA_CPP_FOLDER
}
/blob_list.txt
--api
${
FMHA_KNOWN_APIS
}
--api
${
FMHA_KNOWN_APIS
}
--receipt 3
--receipt 3
...
@@ -50,7 +50,7 @@ endif()
...
@@ -50,7 +50,7 @@ endif()
# With receipt 3 set, we are generating instances for datatype == {fp16 || bfp16}, bias == {no || alibi}, deterministic == off, and dpad == dvpad.
# With receipt 3 set, we are generating instances for datatype == {fp16 || bfp16}, bias == {no || alibi}, deterministic == off, and dpad == dvpad.
add_custom_command
(
add_custom_command
(
OUTPUT
${
FMHA_GEN_BLOBS
}
OUTPUT
${
FMHA_GEN_BLOBS
}
COMMAND
${
P
YTHON
_EXECUTABLE
}
${
FMHA_SRC_FOLDER
}
/generate.py
COMMAND
${
P
ython3
_EXECUTABLE
}
${
FMHA_SRC_FOLDER
}
/generate.py
--output_dir
${
FMHA_CPP_FOLDER
}
--output_dir
${
FMHA_CPP_FOLDER
}
--api
${
FMHA_KNOWN_APIS
}
--api
${
FMHA_KNOWN_APIS
}
--receipt 3
--receipt 3
...
...
script/process_perf_data.py
View file @
29f1d60d
...
@@ -82,7 +82,7 @@ def parse_logfile(logfile):
...
@@ -82,7 +82,7 @@ def parse_logfile(logfile):
StrideA
=
[]
StrideA
=
[]
StrideB
=
[]
StrideB
=
[]
StrideC
=
[]
StrideC
=
[]
if
'perf_gemm
.log'
in
logfile
:
if
'perf_gemm
'
in
logfile
and
'gemm_bilinear'
not
in
logfile
:
for
line
in
open
(
logfile
):
for
line
in
open
(
logfile
):
if
'Best Perf'
in
line
:
if
'Best Perf'
in
line
:
lst
=
line
.
split
()
lst
=
line
.
split
()
...
@@ -260,7 +260,7 @@ def main():
...
@@ -260,7 +260,7 @@ def main():
conn
=
sqlEngine
.
connect
()
conn
=
sqlEngine
.
connect
()
#save gemm performance tests:
#save gemm performance tests:
if
'perf_gemm
.log'
in
filename
:
if
'perf_gemm
'
in
filename
and
'gemm_bilinear'
not
in
filename
:
#write the ck_gemm_test_params table only needed once the test set changes
#write the ck_gemm_test_params table only needed once the test set changes
#post_test_params(test_list,conn)
#post_test_params(test_list,conn)
for
i
in
range
(
1
,
len
(
results
)
+
1
):
for
i
in
range
(
1
,
len
(
results
)
+
1
):
...
...
script/process_perf_data.sh
View file @
29f1d60d
...
@@ -11,9 +11,22 @@
...
@@ -11,9 +11,22 @@
#process results
#process results
python3 process_perf_data.py perf_gemm.log
python3 process_perf_data.py perf_gemm.log
python3 process_perf_data.py perf_onnx_gemm.log
python3 process_perf_data.py perf_resnet50_N256.log
python3 process_perf_data.py perf_resnet50_N256.log
python3 process_perf_data.py perf_resnet50_N4.log
python3 process_perf_data.py perf_resnet50_N4.log
file
=
./perf_onnx_gemm_gfx10.log
if
[
-e
"
$file
"
]
;
then
python3 process_perf_data.py perf_onnx_gemm_gfx10.log
fi
file
=
./perf_onnx_gemm_gfx11.log
if
[
-e
"
$file
"
]
;
then
python3 process_perf_data.py perf_onnx_gemm_gfx11.log
fi
file
=
./perf_onnx_gemm_gfx12.log
if
[
-e
"
$file
"
]
;
then
python3 process_perf_data.py perf_onnx_gemm_gfx12.log
fi
file
=
./perf_fmha_fwd_gfx942.log
file
=
./perf_fmha_fwd_gfx942.log
if
[
-e
"
$file
"
]
;
then
if
[
-e
"
$file
"
]
;
then
python3 process_perf_data.py perf_fmha_fwd_gfx942.log
python3 process_perf_data.py perf_fmha_fwd_gfx942.log
...
...
script/process_qa_data.sh
View file @
29f1d60d
...
@@ -24,6 +24,18 @@ python3 process_perf_data.py perf_splitK_gemm.log
...
@@ -24,6 +24,18 @@ python3 process_perf_data.py perf_splitK_gemm.log
python3 process_perf_data.py perf_onnx_gemm.log
python3 process_perf_data.py perf_onnx_gemm.log
python3 process_perf_data.py perf_mixed_gemm.log
python3 process_perf_data.py perf_mixed_gemm.log
file
=
./perf_onnx_gemm_gfx10.log
if
[
-e
"
$file
"
]
;
then
python3 process_perf_data.py perf_onnx_gemm_gfx10.log
fi
file
=
./perf_onnx_gemm_gfx11.log
if
[
-e
"
$file
"
]
;
then
python3 process_perf_data.py perf_onnx_gemm_gfx11.log
fi
file
=
./perf_onnx_gemm_gfx12.log
if
[
-e
"
$file
"
]
;
then
python3 process_perf_data.py perf_onnx_gemm_gfx12.log
fi
file
=
./perf_fmha_fwd_gfx942.log
file
=
./perf_fmha_fwd_gfx942.log
if
[
-e
"
$file
"
]
;
then
if
[
-e
"
$file
"
]
;
then
python3 process_perf_data.py perf_fmha_fwd_gfx942.log
python3 process_perf_data.py perf_fmha_fwd_gfx942.log
...
...
script/run_full_performance_tests.sh
View file @
29f1d60d
...
@@ -5,7 +5,7 @@
...
@@ -5,7 +5,7 @@
# post your new test results to the database and compare them to the baseline
# post your new test results to the database and compare them to the baseline
# please contact Illia.Silin@amd.com for more details
# please contact Illia.Silin@amd.com for more details
#
#
# run the script as "./run_full_performance_tests.sh <verification> <tag for your test environment> <branch name> <
node name>
# run the script as "./run_full_performance_tests.sh <verification> <tag for your test environment> <branch name> <node name>
# input arguments:
# input arguments:
# verification = 0 : do not verify result correctness on CPU
# verification = 0 : do not verify result correctness on CPU
# = 1 : verifuy correctness on CPU (may take a long time)
# = 1 : verifuy correctness on CPU (may take a long time)
...
...
script/run_gemm_performance_tests.sh
0 → 100755
View file @
29f1d60d
#!/bin/bash
#
# in order to run this script you'd first need to build the ckProfiler executable in ../build/bin/
# run the script as "./run_gemm_performance_tests.sh <verification> <tag for your test environment> <branch name> <node name> <arch>
# input arguments:
# verification = 0 : do not verify result correctness on CPU
# = 1 : verify correctness on CPU (may take a long time)
# environment tag : a string describing the specifics of your test environment
# branch name : name of the branch in git repo (git status | grep -e 'On branch')
# node name : $hostname
# arch : GPU architecture, e.g. "gfx9" or "gfx1100"
#get the command line arguments:
export
verify
=
$1
echo
'Verification: '
$verify
export
env_type
=
$2
echo
'Environment type: '
$env_type
export
branch
=
$3
echo
'Branch name: '
$branch
export
host_name
=
$4
echo
'Host name: '
$host_name
export arch
=
$5
echo
'GPU architecture: '
$arch
function
print_log_header
(){
rm
-f
$1
;
echo
'On branch '
$3
&>
$1
;
echo
'Node name: '
$4
>>
$1
;
#get GPU_arch and number of compute units from rocminfo
echo
-n
"GPU_arch: "
>>
$1
;
rocminfo |
grep
"Name:"
|
grep
"gfx"
>>
$1
;
rocminfo |
grep
"Compute Unit:"
>>
$1
;
hipcc
--version
|
grep
-e
'HIP version'
>>
$1
;
echo
'Environment type: '
$2
>>
$1
;
/opt/rocm/bin/amdclang++
--version
|
grep
-e
'InstalledDir'
>>
$1
;
}
#run ONNX gemm tests
export
onnx_log
=
"perf_onnx_gemm_
$arch
.log"
print_log_header
$onnx_log
$env_type
$branch
$host_name
./profile_onnx_gemm.sh gemm 0 0
$verify
1 0 1 2>&1 |
tee
-a
$onnx_log
./profile_onnx_gemm.sh gemm 1 0
$verify
1 0 1 2>&1 |
tee
-a
$onnx_log
script/run_performance_tests.sh
View file @
29f1d60d
#!/bin/bash
#!/bin/bash
#
#
# in order to run this script you'd first need to build the ckProfiler executable in ../build/bin/
# in order to run this script you'd first need to build the ckProfiler executable in ../build/bin/
# run the script as "./run_performance_tests.sh <verification> <tag for your test environment> <branch name> <
node name>
# run the script as "./run_performance_tests.sh <verification> <tag for your test environment> <branch name> <node name>
# input arguments:
# input arguments:
# verification = 0 : do not verify result correctness on CPU
# verification = 0 : do not verify result correctness on CPU
# = 1 : verify correctness on CPU (may take a long time)
# = 1 : verify correctness on CPU (may take a long time)
...
@@ -51,20 +51,11 @@ print_log_header $gemm_log $env_type $branch $host_name
...
@@ -51,20 +51,11 @@ print_log_header $gemm_log $env_type $branch $host_name
./profile_gemm.sh gemm 2 3
$verify
1 0 1 |
tee
-a
$gemm_log
./profile_gemm.sh gemm 2 3
$verify
1 0 1 |
tee
-a
$gemm_log
./profile_gemm.sh gemm 3 3
$verify
1 0 1 |
tee
-a
$gemm_log
./profile_gemm.sh gemm 3 3
$verify
1 0 1 |
tee
-a
$gemm_log
#run grouped_fwd fp16 tests
#run ONNX gemm tests
export
grouped_conv_fwd_log
=
"perf_grouped_conv_fwd_fp16.log"
export
onnx_log
=
"perf_onnx_gemm.log"
print_log_header
$conv_fwd_log
$env_type
$branch
$host_name
print_log_header
$onnx_log
$env_type
$branch
$host_name
./profile_grouped_conv_fwd.sh grouped_conv_fwd 1 1 0
$verify
1 0 1 256 2>&1 |
tee
-a
$grouped_conv_fwd_log
./profile_onnx_gemm.sh gemm 0 0
$verify
1 0 1 2>&1 |
tee
-a
$onnx_log
./profile_onnx_gemm.sh gemm 1 0
$verify
1 0 1 2>&1 |
tee
-a
$onnx_log
#run grouped_bwd_data fp16 tests
export
grouped_conv_bwd_data_log
=
"perf_grouped_conv_bwd_data_fp16.log"
print_log_header
$grouped_conv_bwd_data_log
$env_type
$branch
$host_name
./profile_grouped_conv_bwd_data.sh grouped_conv_bwd_data 1 1
$verify
1 0 1 256 2>&1 |
tee
-a
$grouped_conv_bwd_data_log
#run grouped_bwd_weight fp16 tests
export
grouped_conv_bwd_weight_log
=
"perf_grouped_conv_bwd_weight_fp16.log"
print_log_header
$grouped_conv_bwd_weight_log
$env_type
$branch
$host_name
./profile_grouped_conv_bwd_weight.sh grouped_conv_bwd_weight 1 1
$verify
1 0 1 256 1 2>&1 |
tee
-a
$grouped_conv_bwd_weight_log
#run resnet50 tests
#run resnet50 tests
export
resnet256_log
=
"perf_resnet50_N256.log"
export
resnet256_log
=
"perf_resnet50_N256.log"
...
...
test/data_type/test_custom_type.cpp
View file @
29f1d60d
...
@@ -51,8 +51,11 @@ TEST(Custom_bool, TestAsType)
...
@@ -51,8 +51,11 @@ TEST(Custom_bool, TestAsType)
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
custom_bool_t
>()(
Number
<
i
>
{})
=
custom_bool_t
{
test_vec
.
at
(
i
)};
right_vec
.
template
AsType
<
custom_bool_t
>()(
Number
<
i
>
{})
=
custom_bool_t
{
test_vec
.
at
(
i
)};
});
});
// copy the vector
vector_type
<
custom_bool_t
,
size
>
left_vec
;
vector_type
<
custom_bool_t
,
size
>
left_vec
{
right_vec
};
// check copy assignment op
left_vec
=
right_vec
;
// overwrite right_vec with 0s
right_vec
=
vector_type
<
custom_bool_t
,
size
>
{};
// check if values were copied correctly
// check if values were copied correctly
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_bool_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_bool_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
...
@@ -129,8 +132,11 @@ TEST(Custom_int8, TestAsType)
...
@@ -129,8 +132,11 @@ TEST(Custom_int8, TestAsType)
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
custom_int8_t
>()(
Number
<
i
>
{})
=
custom_int8_t
{
test_vec
.
at
(
i
)};
right_vec
.
template
AsType
<
custom_int8_t
>()(
Number
<
i
>
{})
=
custom_int8_t
{
test_vec
.
at
(
i
)};
});
});
// copy the vector
vector_type
<
custom_int8_t
,
size
>
left_vec
;
vector_type
<
custom_int8_t
,
size
>
left_vec
{
right_vec
};
// check copy assignment op
left_vec
=
right_vec
;
// overwrite right_vec with 0s
right_vec
=
vector_type
<
custom_int8_t
,
size
>
{};
// check if values were copied correctly
// check if values were copied correctly
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_int8_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_int8_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
...
@@ -207,8 +213,11 @@ TEST(Custom_uint8, TestAsType)
...
@@ -207,8 +213,11 @@ TEST(Custom_uint8, TestAsType)
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
custom_uint8_t
>()(
Number
<
i
>
{})
=
custom_uint8_t
{
test_vec
.
at
(
i
)};
right_vec
.
template
AsType
<
custom_uint8_t
>()(
Number
<
i
>
{})
=
custom_uint8_t
{
test_vec
.
at
(
i
)};
});
});
// copy the vector
vector_type
<
custom_uint8_t
,
size
>
left_vec
;
vector_type
<
custom_uint8_t
,
size
>
left_vec
{
right_vec
};
// check copy assignment op
left_vec
=
right_vec
;
// overwrite right_vec with 0s
right_vec
=
vector_type
<
custom_uint8_t
,
size
>
{};
// check if values were copied correctly
// check if values were copied correctly
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_uint8_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_uint8_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
...
@@ -287,8 +296,11 @@ TEST(Custom_f8, TestAsType)
...
@@ -287,8 +296,11 @@ TEST(Custom_f8, TestAsType)
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
custom_f8_t
>()(
Number
<
i
>
{})
=
custom_f8_t
{
test_vec
.
at
(
i
)};
right_vec
.
template
AsType
<
custom_f8_t
>()(
Number
<
i
>
{})
=
custom_f8_t
{
test_vec
.
at
(
i
)};
});
});
// copy the vector
vector_type
<
custom_f8_t
,
size
>
left_vec
;
vector_type
<
custom_f8_t
,
size
>
left_vec
{
right_vec
};
// check copy assignment op
left_vec
=
right_vec
;
// overwrite right_vec with 0s
right_vec
=
vector_type
<
custom_f8_t
,
size
>
{};
// check if values were copied correctly
// check if values were copied correctly
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_f8_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_f8_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
...
@@ -369,8 +381,11 @@ TEST(Custom_bf8, TestAsType)
...
@@ -369,8 +381,11 @@ TEST(Custom_bf8, TestAsType)
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
custom_bf8_t
>()(
Number
<
i
>
{})
=
custom_bf8_t
{
test_vec
.
at
(
i
)};
right_vec
.
template
AsType
<
custom_bf8_t
>()(
Number
<
i
>
{})
=
custom_bf8_t
{
test_vec
.
at
(
i
)};
});
});
// copy the vector
vector_type
<
custom_bf8_t
,
size
>
left_vec
;
vector_type
<
custom_bf8_t
,
size
>
left_vec
{
right_vec
};
// check copy assignment op
left_vec
=
right_vec
;
// overwrite right_vec with 0s
right_vec
=
vector_type
<
custom_bf8_t
,
size
>
{};
// check if values were copied correctly
// check if values were copied correctly
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_bf8_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_bf8_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
...
@@ -450,8 +465,11 @@ TEST(Custom_half, TestAsType)
...
@@ -450,8 +465,11 @@ TEST(Custom_half, TestAsType)
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
custom_half_t
>()(
Number
<
i
>
{})
=
custom_half_t
{
test_vec
.
at
(
i
)};
right_vec
.
template
AsType
<
custom_half_t
>()(
Number
<
i
>
{})
=
custom_half_t
{
test_vec
.
at
(
i
)};
});
});
// copy the vector
vector_type
<
custom_half_t
,
size
>
left_vec
;
vector_type
<
custom_half_t
,
size
>
left_vec
{
right_vec
};
// check copy assignment op
left_vec
=
right_vec
;
// overwrite right_vec with 0s
right_vec
=
vector_type
<
custom_half_t
,
size
>
{};
// check if values were copied correctly
// check if values were copied correctly
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_half_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_half_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
...
@@ -533,8 +551,11 @@ TEST(Custom_bhalf, TestAsType)
...
@@ -533,8 +551,11 @@ TEST(Custom_bhalf, TestAsType)
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
custom_bhalf_t
>()(
Number
<
i
>
{})
=
custom_bhalf_t
{
test_vec
.
at
(
i
)};
right_vec
.
template
AsType
<
custom_bhalf_t
>()(
Number
<
i
>
{})
=
custom_bhalf_t
{
test_vec
.
at
(
i
)};
});
});
// copy the vector
vector_type
<
custom_bhalf_t
,
size
>
left_vec
;
vector_type
<
custom_bhalf_t
,
size
>
left_vec
{
right_vec
};
// check copy assignment op
left_vec
=
right_vec
;
// overwrite right_vec with 0s
right_vec
=
vector_type
<
custom_bhalf_t
,
size
>
{};
// check if values were copied correctly
// check if values were copied correctly
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_bhalf_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_bhalf_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
...
@@ -615,8 +636,11 @@ TEST(Custom_float, TestAsType)
...
@@ -615,8 +636,11 @@ TEST(Custom_float, TestAsType)
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
custom_float_t
>()(
Number
<
i
>
{})
=
custom_float_t
{
test_vec
.
at
(
i
)};
right_vec
.
template
AsType
<
custom_float_t
>()(
Number
<
i
>
{})
=
custom_float_t
{
test_vec
.
at
(
i
)};
});
});
// copy the vector
vector_type
<
custom_float_t
,
size
>
left_vec
;
vector_type
<
custom_float_t
,
size
>
left_vec
{
right_vec
};
// check copy assignment op
left_vec
=
right_vec
;
// overwrite right_vec with 0s
right_vec
=
vector_type
<
custom_float_t
,
size
>
{};
// check if values were copied correctly
// check if values were copied correctly
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_float_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_float_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
...
@@ -693,8 +717,11 @@ TEST(Custom_double, TestAsType)
...
@@ -693,8 +717,11 @@ TEST(Custom_double, TestAsType)
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
right_vec
.
template
AsType
<
custom_double_t
>()(
Number
<
i
>
{})
=
custom_double_t
{
test_vec
.
at
(
i
)};
right_vec
.
template
AsType
<
custom_double_t
>()(
Number
<
i
>
{})
=
custom_double_t
{
test_vec
.
at
(
i
)};
});
});
// copy the vector
vector_type
<
custom_double_t
,
size
>
left_vec
;
vector_type
<
custom_double_t
,
size
>
left_vec
{
right_vec
};
// check copy assignment op
left_vec
=
right_vec
;
// overwrite right_vec with 0s
right_vec
=
vector_type
<
custom_double_t
,
size
>
{};
// check if values were copied correctly
// check if values were copied correctly
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_double_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
ASSERT_EQ
(
left_vec
.
template
AsType
<
custom_double_t
>()(
Number
<
i
>
{}).
data
,
test_vec
.
at
(
i
));
...
@@ -813,8 +840,11 @@ TEST(Complex_half, TestAsType)
...
@@ -813,8 +840,11 @@ TEST(Complex_half, TestAsType)
right_vec
.
template
AsType
<
complex_half_t
>()(
Number
<
i
>
{})
=
right_vec
.
template
AsType
<
complex_half_t
>()(
Number
<
i
>
{})
=
complex_half_t
{
test_vec
.
at
(
num_elem
*
i
),
test_vec
.
at
(
num_elem
*
i
+
1
)};
complex_half_t
{
test_vec
.
at
(
num_elem
*
i
),
test_vec
.
at
(
num_elem
*
i
+
1
)};
});
});
// copy the vector
vector_type
<
complex_half_t
,
size
>
left_vec
;
vector_type
<
complex_half_t
,
size
>
left_vec
{
right_vec
};
// check copy assignment op
left_vec
=
right_vec
;
// overwrite right_vec with 0s
right_vec
=
vector_type
<
complex_half_t
,
size
>
{};
// check if values were copied correctly
// check if values were copied correctly
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ASSERT_EQ
(
left_vec
.
template
AsType
<
complex_half_t
>()(
Number
<
i
>
{}).
real
,
ASSERT_EQ
(
left_vec
.
template
AsType
<
complex_half_t
>()(
Number
<
i
>
{}).
real
,
...
@@ -907,8 +937,11 @@ TEST(FP8OCP, TestAsType)
...
@@ -907,8 +937,11 @@ TEST(FP8OCP, TestAsType)
right_vec
.
template
AsType
<
f8_t
>()(
Number
<
i
>
{})
=
ck
::
type_convert
<
f8_t
>
(
test_vec
.
at
(
i
));
right_vec
.
template
AsType
<
f8_t
>()(
Number
<
i
>
{})
=
ck
::
type_convert
<
f8_t
>
(
test_vec
.
at
(
i
));
});
});
// copy the vector
vector_type
<
f8_t
,
size
>
left_vec
;
vector_type
<
f8_t
,
size
>
left_vec
{
right_vec
};
// check copy assignment op
left_vec
=
right_vec
;
// overwrite right_vec with 0s
right_vec
=
vector_type
<
f8_t
,
size
>
{};
// check if values were copied correctly
// check if values were copied correctly
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
...
@@ -984,8 +1017,11 @@ TEST(BF8OCP, TestAsType)
...
@@ -984,8 +1017,11 @@ TEST(BF8OCP, TestAsType)
right_vec
.
template
AsType
<
bf8_t
>()(
Number
<
i
>
{})
=
ck
::
type_convert
<
bf8_t
>
(
test_vec
.
at
(
i
));
right_vec
.
template
AsType
<
bf8_t
>()(
Number
<
i
>
{})
=
ck
::
type_convert
<
bf8_t
>
(
test_vec
.
at
(
i
));
});
});
// copy the vector
vector_type
<
bf8_t
,
size
>
left_vec
{
right_vec
};
vector_type
<
bf8_t
,
size
>
left_vec
{
right_vec
};
// check copy assignment op
left_vec
=
right_vec
;
// overwrite right_vec with 0s
right_vec
=
vector_type
<
bf8_t
,
size
>
{};
// check if values were copied correctly
// check if values were copied correctly
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
ck
::
static_for
<
0
,
size
,
1
>
{}([
&
](
auto
i
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment