Unverified Commit 984b3722 authored by Illia Silin's avatar Illia Silin Committed by GitHub
Browse files

Run CI on MI100 nodes only, run daily QA on MI200 nodes. (#339)



* turn on full qa only on gfx90a, use int initialization

* change script syntax

* update script parsing clinfo, throw exception if 0 devices

* fix syntax

* try using toBoolean for the QA conditions

* run regular CI on MI100 only, use MI200 only for daily QA

* evaluate when conditions before agent

* launch QA on develop branch and update profile_reduce script

* update test script

* update script

* remove false dependency from dockerfile

* try removing rbuild completely
Co-authored-by: default avatarChao Liu <chao.liu2@amd.com>
Co-authored-by: default avatarChao Liu <lc.roy86@gmail.com>
parent 500fa995
...@@ -24,8 +24,8 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow- ...@@ -24,8 +24,8 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --allow-
cmake-data=3.15.1-0kitware1 \ cmake-data=3.15.1-0kitware1 \
cmake=3.15.1-0kitware1 \ cmake=3.15.1-0kitware1 \
curl \ curl \
g++ \ # g++ \
gdb \ # gdb \
git \ git \
hip-rocclr \ hip-rocclr \
jq \ jq \
...@@ -63,16 +63,16 @@ RUN wget https://github.com/Yelp/dumb-init/releases/download/v1.2.0/dumb-init_1. ...@@ -63,16 +63,16 @@ RUN wget https://github.com/Yelp/dumb-init/releases/download/v1.2.0/dumb-init_1.
RUN dpkg -i dumb-init_*.deb && rm dumb-init_*.deb RUN dpkg -i dumb-init_*.deb && rm dumb-init_*.deb
# Install cget # Install cget
RUN pip install cget #RUN pip install cget
# Install rclone # Install rclone
RUN pip install https://github.com/pfultz2/rclone/archive/master.tar.gz #RUN pip install https://github.com/pfultz2/rclone/archive/master.tar.gz
ARG PREFIX=/opt/rocm ARG PREFIX=/opt/rocm
# Install dependencies # Install dependencies
RUN cget install pfultz2/rocm-recipes #RUN cget install pfultz2/rocm-recipes
# Install rbuild # Install rbuild
RUN pip3 install https://github.com/RadeonOpenCompute/rbuild/archive/6d78a0553babdaea8d2da5de15cbda7e869594b8.tar.gz #RUN pip3 install https://github.com/RadeonOpenCompute/rbuild/archive/6d78a0553babdaea8d2da5de15cbda7e869594b8.tar.gz
# Install packages for processing the performance results # Install packages for processing the performance results
RUN pip3 install --upgrade pip RUN pip3 install --upgrade pip
RUN pip3 install sqlalchemy RUN pip3 install sqlalchemy
...@@ -85,9 +85,9 @@ ENV UBSAN_OPTIONS=print_stacktrace=1 ...@@ -85,9 +85,9 @@ ENV UBSAN_OPTIONS=print_stacktrace=1
ENV LC_ALL=C.UTF-8 ENV LC_ALL=C.UTF-8
ENV LANG=C.UTF-8 ENV LANG=C.UTF-8
ADD rbuild.ini /rbuild.ini #ADD rbuild.ini /rbuild.ini
ADD dev-requirements.txt dev-requirements.txt ADD dev-requirements.txt dev-requirements.txt
RUN rbuild prepare -s develop -d $PREFIX #RUN rbuild prepare -s develop -d $PREFIX
RUN groupadd -f render RUN groupadd -f render
# Install the new rocm-cmake version # Install the new rocm-cmake version
......
...@@ -12,8 +12,9 @@ def show_node_info() { ...@@ -12,8 +12,9 @@ def show_node_info() {
} }
def runShell(String command){ def runShell(String command){
def responseCode = sh returnStatus: true, script: "${command} &> tmp.txt" def responseCode = sh returnStatus: true, script: "${command} > tmp.txt"
def output = readFile(file: "tmp.txt") def output = readFile(file: "tmp.txt")
echo "tmp.txt contents: $output"
return (output != "") return (output != "")
} }
...@@ -121,8 +122,7 @@ def buildHipClangJob(Map conf=[:]){ ...@@ -121,8 +122,7 @@ def buildHipClangJob(Map conf=[:]){
timeout(time: 5, unit: 'MINUTES'){ timeout(time: 5, unit: 'MINUTES'){
sh 'PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" clinfo | tee clinfo.log' sh 'PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" clinfo | tee clinfo.log'
if ( runShell('grep -n "Number of devices:.*. 0" clinfo.log') ){ if ( runShell('grep -n "Number of devices:.*. 0" clinfo.log') ){
echo "GPU not found" throw new Exception ("GPU not found")
throw e
} }
else{ else{
echo "GPU is OK" echo "GPU is OK"
...@@ -140,8 +140,7 @@ def buildHipClangJob(Map conf=[:]){ ...@@ -140,8 +140,7 @@ def buildHipClangJob(Map conf=[:]){
timeout(time: 5, unit: 'MINUTES'){ timeout(time: 5, unit: 'MINUTES'){
sh 'PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" clinfo |tee clinfo.log' sh 'PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" clinfo |tee clinfo.log'
if ( runShell('grep -n "Number of devices:.*. 0" clinfo.log') ){ if ( runShell('grep -n "Number of devices:.*. 0" clinfo.log') ){
echo "GPU not found" throw new Exception ("GPU not found")
throw e
} }
else{ else{
echo "GPU is OK" echo "GPU is OK"
...@@ -153,14 +152,6 @@ def buildHipClangJob(Map conf=[:]){ ...@@ -153,14 +152,6 @@ def buildHipClangJob(Map conf=[:]){
withDockerContainer(image: image, args: dockerOpts + ' -v=/var/jenkins/:/var/jenkins') { withDockerContainer(image: image, args: dockerOpts + ' -v=/var/jenkins/:/var/jenkins') {
timeout(time: 5, unit: 'HOURS') timeout(time: 5, unit: 'HOURS')
{ {
sh 'PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" clinfo | tee clinfo.log'
if ( runShell('grep -n "Number of devices:.*. 0" clinfo.log') ){
echo "GPU not found"
throw e
}
else{
echo "GPU is OK"
}
cmake_build(conf) cmake_build(conf)
} }
} }
...@@ -223,8 +214,7 @@ def runCKProfiler(Map conf=[:]){ ...@@ -223,8 +214,7 @@ def runCKProfiler(Map conf=[:]){
timeout(time: 5, unit: 'MINUTES'){ timeout(time: 5, unit: 'MINUTES'){
sh 'PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" clinfo | tee clinfo.log' sh 'PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" clinfo | tee clinfo.log'
if ( runShell('grep -n "Number of devices:.*. 0" clinfo.log') ){ if ( runShell('grep -n "Number of devices:.*. 0" clinfo.log') ){
echo "GPU not found" throw new Exception ("GPU not found")
throw e
} }
else{ else{
echo "GPU is OK" echo "GPU is OK"
...@@ -242,8 +232,7 @@ def runCKProfiler(Map conf=[:]){ ...@@ -242,8 +232,7 @@ def runCKProfiler(Map conf=[:]){
timeout(time: 5, unit: 'MINUTES'){ timeout(time: 5, unit: 'MINUTES'){
sh 'PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" clinfo | tee clinfo.log' sh 'PATH="/opt/rocm/opencl/bin:/opt/rocm/opencl/bin/x86_64:$PATH" clinfo | tee clinfo.log'
if ( runShell('grep -n "Number of devices:.*. 0" clinfo.log') ){ if ( runShell('grep -n "Number of devices:.*. 0" clinfo.log') ){
echo "GPU not found" throw new Exception ("GPU not found")
throw e
} }
else{ else{
echo "GPU is OK" echo "GPU is OK"
...@@ -268,7 +257,7 @@ def runCKProfiler(Map conf=[:]){ ...@@ -268,7 +257,7 @@ def runCKProfiler(Map conf=[:]){
archiveArtifacts "perf_gemm_${gpu_arch}.log" archiveArtifacts "perf_gemm_${gpu_arch}.log"
archiveArtifacts "perf_resnet50_N256_${gpu_arch}.log" archiveArtifacts "perf_resnet50_N256_${gpu_arch}.log"
archiveArtifacts "perf_resnet50_N4_${gpu_arch}.log" archiveArtifacts "perf_resnet50_N4_${gpu_arch}.log"
archiveArtifacts "perf_bathced_gemm_${gpu_arch}.log" archiveArtifacts "perf_batched_gemm_${gpu_arch}.log"
archiveArtifacts "perf_grouped_gemm_${gpu_arch}.log" archiveArtifacts "perf_grouped_gemm_${gpu_arch}.log"
archiveArtifacts "perf_fwd_conv_${gpu_arch}.log" archiveArtifacts "perf_fwd_conv_${gpu_arch}.log"
archiveArtifacts "perf_bwd_conv_${gpu_arch}.log" archiveArtifacts "perf_bwd_conv_${gpu_arch}.log"
...@@ -278,7 +267,7 @@ def runCKProfiler(Map conf=[:]){ ...@@ -278,7 +267,7 @@ def runCKProfiler(Map conf=[:]){
stash name: "perf_gemm_${gpu_arch}.log" stash name: "perf_gemm_${gpu_arch}.log"
stash name: "perf_resnet50_N256_${gpu_arch}.log" stash name: "perf_resnet50_N256_${gpu_arch}.log"
stash name: "perf_resnet50_N4_${gpu_arch}.log" stash name: "perf_resnet50_N4_${gpu_arch}.log"
stash name: "perf_bathced_gemm_${gpu_arch}.log" stash name: "perf_batched_gemm_${gpu_arch}.log"
stash name: "perf_grouped_gemm_${gpu_arch}.log" stash name: "perf_grouped_gemm_${gpu_arch}.log"
stash name: "perf_fwd_conv_${gpu_arch}.log" stash name: "perf_fwd_conv_${gpu_arch}.log"
stash name: "perf_bwd_conv_${gpu_arch}.log" stash name: "perf_bwd_conv_${gpu_arch}.log"
...@@ -362,7 +351,7 @@ def process_results(Map conf=[:]){ ...@@ -362,7 +351,7 @@ def process_results(Map conf=[:]){
unstash "perf_gemm_${gpu_arch}.log" unstash "perf_gemm_${gpu_arch}.log"
unstash "perf_resnet50_N256_${gpu_arch}.log" unstash "perf_resnet50_N256_${gpu_arch}.log"
unstash "perf_resnet50_N4_${gpu_arch}.log" unstash "perf_resnet50_N4_${gpu_arch}.log"
unstash "perf_bathced_gemm_${gpu_arch}.log" unstash "perf_batched_gemm_${gpu_arch}.log"
unstash "perf_grouped_gemm_${gpu_arch}.log" unstash "perf_grouped_gemm_${gpu_arch}.log"
unstash "perf_fwd_conv_${gpu_arch}.log" unstash "perf_fwd_conv_${gpu_arch}.log"
unstash "perf_bwd_conv_${gpu_arch}.log" unstash "perf_bwd_conv_${gpu_arch}.log"
...@@ -389,13 +378,13 @@ def process_results(Map conf=[:]){ ...@@ -389,13 +378,13 @@ def process_results(Map conf=[:]){
} }
//launch develop branch daily at 23:00 in FULL_QA mode //launch develop branch daily at 23:00 in FULL_QA mode
//CRON_SETTINGS = BRANCH_NAME == "develop" ? '''0 23 * * * % RUN_FULL_QA=true;USE_9110=true''' : "" CRON_SETTINGS = BRANCH_NAME == "develop" ? '''0 23 * * * % RUN_FULL_QA=true;USE_9110=true''' : ""
pipeline { pipeline {
agent none agent none
//triggers { triggers {
// cron(CRON_SETTINGS) parameterizedCron(CRON_SETTINGS)
//} }
options { options {
parallelsAlwaysFailFast() parallelsAlwaysFailFast()
} }
...@@ -467,6 +456,10 @@ pipeline { ...@@ -467,6 +456,10 @@ pipeline {
} }
stage("Run Tests: gfx90a") stage("Run Tests: gfx90a")
{ {
when {
beforeAgent true
expression { params.RUN_FULL_QA.toBoolean() }
}
agent{ label rocmnode("gfx90a")} agent{ label rocmnode("gfx90a")}
environment{ environment{
setup_args = """ -D CMAKE_CXX_FLAGS="--offload-arch=gfx90a -O3 " -DBUILD_DEV=On """ setup_args = """ -D CMAKE_CXX_FLAGS="--offload-arch=gfx90a -O3 " -DBUILD_DEV=On """
...@@ -500,6 +493,10 @@ pipeline { ...@@ -500,6 +493,10 @@ pipeline {
{ {
stage("Run ckProfiler: gfx908") stage("Run ckProfiler: gfx908")
{ {
when {
beforeAgent true
expression { !params.RUN_FULL_QA.toBoolean() }
}
agent{ label rocmnode("gfx908")} agent{ label rocmnode("gfx908")}
environment{ environment{
setup_args = """ -D CMAKE_CXX_FLAGS="--offload-arch=gfx908 -O3 " -DBUILD_DEV=On """ setup_args = """ -D CMAKE_CXX_FLAGS="--offload-arch=gfx908 -O3 " -DBUILD_DEV=On """
...@@ -510,6 +507,10 @@ pipeline { ...@@ -510,6 +507,10 @@ pipeline {
} }
stage("Run ckProfiler: gfx90a") stage("Run ckProfiler: gfx90a")
{ {
when {
beforeAgent true
expression { params.RUN_FULL_QA.toBoolean() }
}
agent{ label rocmnode("gfx90a")} agent{ label rocmnode("gfx90a")}
environment{ environment{
setup_args = """ -D CMAKE_CXX_FLAGS="--offload-arch=gfx90a -O3 " -DBUILD_DEV=On """ setup_args = """ -D CMAKE_CXX_FLAGS="--offload-arch=gfx90a -O3 " -DBUILD_DEV=On """
...@@ -525,12 +526,20 @@ pipeline { ...@@ -525,12 +526,20 @@ pipeline {
parallel parallel
{ {
stage("Process results for gfx908"){ stage("Process results for gfx908"){
when {
beforeAgent true
expression { !params.RUN_FULL_QA.toBoolean() }
}
agent { label 'mici' } agent { label 'mici' }
steps{ steps{
process_results(gpu_arch: "gfx908") process_results(gpu_arch: "gfx908")
} }
} }
stage("Process results for gfx90a"){ stage("Process results for gfx90a"){
when {
beforeAgent true
expression { params.RUN_FULL_QA.toBoolean() }
}
agent { label 'mici' } agent { label 'mici' }
steps{ steps{
process_results(gpu_arch: "gfx90a") process_results(gpu_arch: "gfx90a")
......
#!/bin/bash
## GPU visibility
export HIP_VISIBLE_DEVICES=0
make -j $1
DRIVER=example/$1
VERIFY=$2
INIT=$3
REPEAT=$4
# test
######## verify init repeat N__ K___ C___ Y X Hi__ Wi__ Strides Dilations LeftPads RightPads Desired_grid_size__
$DRIVER $VERIFY $INIT $REPEAT 128 256 192 3 3 71 71 2 2 1 1 1 1 1 1 $DESIRED_GRID_SIZE
#$DRIVER $VERIFY $INIT $REPEAT 128 256 64 1 1 1 1 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
#$DRIVER $VERIFY $INIT $REPEAT 256 64 3 7 7 230 230 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
#$DRIVER $VERIFY $INIT $REPEAT 128 512 512 3 3 7 7 1 1 1 1 1 1 1 1 $DESIRED_GRID_SIZE
#$DRIVER $VERIFY $INIT $REPEAT 256 64 3 7 7 224 224 2 2 1 1 3 3 3 3
N=$5
# Resnet50
######## verify init repeat N__ K___ C___ Y X Hi__ Wi__ Strides Dilations LeftPads RightPads Desired_grid_size__
#$DRIVER $VERIFY $INIT $REPEAT $N 2048 1024 1 1 14 14 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
#$DRIVER $VERIFY $INIT $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
#$DRIVER $VERIFY $INIT $REPEAT $N 512 1024 1 1 14 14 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
#$DRIVER $VERIFY $INIT $REPEAT $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1 $DESIRED_GRID_SIZE
#$DRIVER $VERIFY $INIT $REPEAT $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
#$DRIVER $VERIFY $INIT $REPEAT $N 128 128 3 3 58 58 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
#$DRIVER $VERIFY $INIT $REPEAT $N 512 2048 1 1 7 7 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
#$DRIVER $VERIFY $INIT $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
#$DRIVER $VERIFY $INIT $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1 $DESIRED_GRID_SIZE
#$DRIVER $VERIFY $INIT $REPEAT $N 256 256 3 3 30 30 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
#$DRIVER $VERIFY $INIT $REPEAT $N 128 256 1 1 56 56 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
#$DRIVER $VERIFY $INIT $REPEAT $N 512 256 1 1 56 56 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
#$DRIVER $VERIFY $INIT $REPEAT $N 64 256 1 1 56 56 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
#$DRIVER $VERIFY $INIT $REPEAT $N 512 512 3 3 16 16 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
#$DRIVER $VERIFY $INIT $REPEAT $N 1024 512 1 1 28 28 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
#$DRIVER $VERIFY $INIT $REPEAT $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
#$DRIVER $VERIFY $INIT $REPEAT $N 256 512 1 1 28 28 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
#$DRIVER $VERIFY $INIT $REPEAT $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
#$DRIVER $VERIFY $INIT $REPEAT $N 512 512 3 3 7 7 1 1 1 1 1 1 1 1 $DESIRED_GRID_SIZE
#$DRIVER $VERIFY $INIT $REPEAT $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
#$DRIVER $VERIFY $INIT $REPEAT $N 64 64 1 1 56 56 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
#$DRIVER $VERIFY $INIT $REPEAT $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1 $DESIRED_GRID_SIZE
#!/bin/bash
## GPU visibility
export HIP_VISIBLE_DEVICES=0
make -j conv_fwd_driver_offline
#make -j conv_bwd_driver_offline
#make -j conv_wrw_driver_offline
DRIVER="./host/driver_offline/conv_fwd_driver_offline"
#DRIVER="./host/driver_offline/conv_bwd_driver_offline"
#DRIVER="./host/driver_offline/conv_wrw_driver_offline"
LAYOUT=$1
ALGO=$2
VERIFY=$3
INIT=$4
LOG=$5
REPEAT=$6
DESIRED_GRID_SIZE=$7
######### layout algo verify init log repeat N__ K___ C___ Y X Hi_ Wi__ Strides Dilations LeftPads RightPads Desired_grid_size__
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 128 192 3 3 71 71 2 2 1 1 1 1 1 1 $DESIRED_GRID_SIZE
$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 256 192 3 3 71 71 2 2 1 1 1 1 1 1 $DESIRED_GRID_SIZE
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 256 1024 1 7 17 17 1 1 1 1 0 3 0 3 $DESIRED_GRID_SIZE
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 256 256 3 3 14 14 1 1 1 1 1 1 1 1 $DESIRED_GRID_SIZE
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 128 128 3 3 14 14 1 1 1 1 1 1 1 1 $DESIRED_GRID_SIZE
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 512 512 3 3 7 7 1 1 1 1 1 1 1 1 $DESIRED_GRID_SIZE
$DESIRED_GRID_SIZE
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 512 192 3 3 35 35 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 256 256 3 3 30 30 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 512 512 3 3 16 16 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
$DESIRED_GRID_SIZE
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 2048 1024 1 1 14 14 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 256 1024 1 1 14 14 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 512 2048 1 1 7 7 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
$DESIRED_GRID_SIZE
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 256 256 3 3 14 14 1 1 1 1 1 1 1 1 $DESIRED_GRID_SIZE
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 256 256 1 1 14 14 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
$DESIRED_GRID_SIZE
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 32 256 3 3 1 1 1 1 1 1 1 1 1 1 $DESIRED_GRID_SIZE
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 32 256 1 1 1 1 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
$DESIRED_GRID_SIZE
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 256 64 1 1 2 2 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 128 256 128 1 1 2 2 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
# Resnet50
######### layout algo verify init log repeat N__ K___ C___ Y X Hi_ Wi__ Strides Dilations LeftPads RightPads Desired_grid_size__
##DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 2048 1024 1 1 14 14 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
##DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 256 1024 1 1 14 14 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
##DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 512 1024 1 1 14 14 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
##DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 128 128 3 3 28 28 1 1 1 1 1 1 1 1 $DESIRED_GRID_SIZE
##DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 512 128 1 1 28 28 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
##DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 128 128 3 3 58 58 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
##DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 512 2048 1 1 7 7 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
##DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 1024 256 1 1 14 14 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
##DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 256 256 3 3 14 14 1 1 1 1 1 1 1 1 $DESIRED_GRID_SIZE
##DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 256 256 3 3 30 30 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
##DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 128 256 1 1 56 56 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
##DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 512 256 1 1 56 56 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
##DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 64 256 1 1 56 56 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
##DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 512 512 3 3 16 16 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
##DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 1024 512 1 1 28 28 2 2 1 1 0 0 0 0 $DESIRED_GRID_SIZE
##DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 128 512 1 1 28 28 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
##DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 256 512 1 1 28 28 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
##DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 2048 512 1 1 7 7 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
##DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 512 512 3 3 7 7 1 1 1 1 1 1 1 1 $DESIRED_GRID_SIZE
##DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 256 64 1 1 56 56 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
##DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 64 64 1 1 56 56 1 1 1 1 0 0 0 0 $DESIRED_GRID_SIZE
##DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 256 64 64 3 3 56 56 1 1 1 1 1 1 1 1 $DESIRED_GRID_SIZE
#!/bin/bash
## GPU visibility
export HIP_VISIBLE_DEVICES=1
make -j gemm_xdl
DRIVER="./example/gemm_xdl"
VERIFY=$1
INIT=$2
LOG=$3
REPEAT=$4
######### verify init log repeat M___ N___ K___ StrideA StrideB StrideC
#$DRIVER $VERIFY $INIT $LOG $REPEAT 960 1024 1024 1024 1024 1024
#$DRIVER $VERIFY $INIT $LOG $REPEAT 1024 1024 1024 1024 1024 1024
#$DRIVER $VERIFY $INIT $LOG $REPEAT 1920 2048 2048 2048 2048 2048
$DRIVER $VERIFY $INIT $LOG $REPEAT 3840 4096 4096 4096 4096 4096
#$DRIVER $VERIFY $INIT $LOG $REPEAT 7680 8192 8192 8192 8192 8192
#!/bin/bash
## GPU visibility
export HIP_VISIBLE_DEVICES=0
make -j $1
DRIVER=example/$1
VERIFY=$2
INIT=$3
REPEAT=$4
######## verify init repeat M___ N___ K___ StrideA StrideB StrideC StrideC1
#$DRIVER $VERIFY $INIT $REPEAT 256 256 256 256 256 256 256
#$DRIVER $VERIFY $INIT $REPEAT 960 1024 1024 1024 1024 1024 1024
#$DRIVER $VERIFY $INIT $REPEAT 1920 2048 2048 2048 2048 2048 2048
$DRIVER $VERIFY $INIT $REPEAT 3840 4096 4096 4096 4096 4096 4096
#$DRIVER $VERIFY $INIT $REPEAT 7680 8192 8192 8192 8192 8192 8192
#$DRIVER $VERIFY $INIT $REPEAT 1024 1024 1024 1024 1024 1024 1024
#$DRIVER $VERIFY $INIT $REPEAT 2048 2048 2048 2048 2048 2048 2048
#!/bin/bash
## GPU visibility
export HIP_VISIBLE_DEVICES=0
make -j gemm_driver_offline
DRIVER="./host/driver_offline/gemm_driver_offline"
LAYOUT=$1
ALGO=$2
VERIFY=$3
INIT=$4
LOG=$5
REPEAT=$6
M01=$7
N01=$8
######### layout algo verify init log repeat M___ N___ K___ M01_ N01_
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 960 1024 1024 $M01 $N01
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 1024 1024 1024 $M01 $N01
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 1920 2048 2048 $M01 $N01
$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 3840 4096 4096 $M01 $N01
#$DRIVER $LAYOUT $ALGO $VERIFY $INIT $LOG $REPEAT 7680 8192 8192 $M01 $N01
#!/bin/bash
## GPU visibility
export HIP_VISIBLE_DEVICES=0
make -j $1
DRIVER=example/$1
VERIFY=$2
INIT=$3
REPEAT=$4
# test
######## verify init repeat N__ C___ Y X Hi__ Wi__ Strides LeftPads RightPads
#$DRIVER $VERIFY $INIT $REPEAT 128 192 3 3 71 71 2 2 1 1 1 1
#$DRIVER $VERIFY $INIT $REPEAT 128 64 1 1 1 1 1 1 0 0 0 0
#$DRIVER $VERIFY $INIT $REPEAT 256 3 7 7 230 230 2 2 0 0 0 0
$DRIVER $VERIFY $INIT $REPEAT 256 1024 14 14 14 14 1 1 0 0 0 0
N=$5
# Resnet50
######## verify init repeat N__ C___ Y X Hi__ Wi__ Strides LeftPads RightPads
#$DRIVER $VERIFY $INIT $REPEAT $N 1024 1 1 14 14 2 2 0 0 0 0
#$DRIVER $VERIFY $INIT $REPEAT $N 1024 1 1 14 14 1 1 0 0 0 0
#$DRIVER $VERIFY $INIT $REPEAT $N 1024 1 1 14 14 1 1 0 0 0 0
#$DRIVER $VERIFY $INIT $REPEAT $N 128 3 3 28 28 1 1 1 1 1 1
#$DRIVER $VERIFY $INIT $REPEAT $N 128 1 1 28 28 1 1 0 0 0 0
#$DRIVER $VERIFY $INIT $REPEAT $N 128 3 3 58 58 2 2 0 0 0 0
#$DRIVER $VERIFY $INIT $REPEAT $N 2048 1 1 7 7 1 1 0 0 0 0
#$DRIVER $VERIFY $INIT $REPEAT $N 256 1 1 14 14 1 1 0 0 0 0
#$DRIVER $VERIFY $INIT $REPEAT $N 256 3 3 14 14 1 1 1 1 1 1
#$DRIVER $VERIFY $INIT $REPEAT $N 256 3 3 30 30 2 2 0 0 0 0
#$DRIVER $VERIFY $INIT $REPEAT $N 256 1 1 56 56 1 1 0 0 0 0
#$DRIVER $VERIFY $INIT $REPEAT $N 256 1 1 56 56 2 2 0 0 0 0
#$DRIVER $VERIFY $INIT $REPEAT $N 256 1 1 56 56 1 1 0 0 0 0
#$DRIVER $VERIFY $INIT $REPEAT $N 512 3 3 16 16 2 2 0 0 0 0
#$DRIVER $VERIFY $INIT $REPEAT $N 512 1 1 28 28 2 2 0 0 0 0
#$DRIVER $VERIFY $INIT $REPEAT $N 512 1 1 28 28 1 1 0 0 0 0
#$DRIVER $VERIFY $INIT $REPEAT $N 512 1 1 28 28 1 1 0 0 0 0
#$DRIVER $VERIFY $INIT $REPEAT $N 512 1 1 7 7 1 1 0 0 0 0
#$DRIVER $VERIFY $INIT $REPEAT $N 512 3 3 7 7 1 1 1 1 1 1
#$DRIVER $VERIFY $INIT $REPEAT $N 64 1 1 56 56 1 1 0 0 0 0
#$DRIVER $VERIFY $INIT $REPEAT $N 64 1 1 56 56 1 1 0 0 0 0
#$DRIVER $VERIFY $INIT $REPEAT $N 64 3 3 56 56 1 1 1 1 1 1
#$DRIVER $VERIFY $INIT $REPEAT $N 3 7 7 230 230 2 2 0 0 0 0
...@@ -120,14 +120,14 @@ def parse_logfile(logfile): ...@@ -120,14 +120,14 @@ def parse_logfile(logfile):
res = [x for _,x in sorted(zip(tests,tflops))] res = [x for _,x in sorted(zip(tests,tflops))]
#sorted_kernels = [x for _,x in sorted(zip(tests,kernels))] #sorted_kernels = [x for _,x in sorted(zip(tests,kernels))]
test_list=list(range(1,len(tests)+1)) test_list=list(range(1,len(tests)+1))
#parse fwd_conv performance tests: #parse conv_fwd performance tests:
elif 'fwd_conv' in logfile: elif 'conv_fwd' in logfile:
for line in open(logfile): for line in open(logfile):
if 'tflops:' in line: if 'tflops:' in line:
lst=line.split() lst=line.split()
res.append(lst[1]) res.append(lst[1])
#parse all other performance tests: #parse all other performance tests:
elif 'resnet50' or 'batched_gemm' or 'grouped_gemm' or 'bwd_conv' or 'fusion' or 'reduction' in logfile: elif 'resnet50' or 'batched_gemm' or 'grouped_gemm' or 'conv_bwd_data' or 'gemm_bilinear' or 'reduction' in logfile:
for line in open(logfile): for line in open(logfile):
if 'Best Perf' in line: if 'Best Perf' in line:
lst=line.split() lst=line.split()
...@@ -257,18 +257,18 @@ def main(): ...@@ -257,18 +257,18 @@ def main():
for i in range(1,len(results)+1): for i in range(1,len(results)+1):
testlist.append("Test%i"%i) testlist.append("Test%i"%i)
table_name="ck_grouped_gemm_tflops" table_name="ck_grouped_gemm_tflops"
if 'fwd_conv' in filename: if 'conv_fwd' in filename:
for i in range(1,len(results)+1): for i in range(1,len(results)+1):
testlist.append("Test%i"%i) testlist.append("Test%i"%i)
table_name="ck_fwd_conv_tflops" table_name="ck_conv_fwd_tflops"
if 'bwd_conv' in filename: if 'conv_bwd_data' in filename:
for i in range(1,len(results)+1): for i in range(1,len(results)+1):
testlist.append("Test%i"%i) testlist.append("Test%i"%i)
table_name="ck_bwd_conv_tflops" table_name="ck_conv_bwd_data_tflops"
if 'fusion' in filename: if 'gemm_bilinear' in filename:
for i in range(1,len(results)+1): for i in range(1,len(results)+1):
testlist.append("Test%i"%i) testlist.append("Test%i"%i)
table_name="ck_fusion_tflops" table_name="ck_gemm_bilinear_tflops"
if 'reduction' in filename: if 'reduction' in filename:
for i in range(1,len(results)+1): for i in range(1,len(results)+1):
testlist.append("Test%i"%i) testlist.append("Test%i"%i)
......
...@@ -16,7 +16,7 @@ python3 process_perf_data.py perf_resnet50_N265_"$gpu_arch".log ...@@ -16,7 +16,7 @@ python3 process_perf_data.py perf_resnet50_N265_"$gpu_arch".log
python3 process_perf_data.py perf_resnet50_N4_"$gpu_arch".log python3 process_perf_data.py perf_resnet50_N4_"$gpu_arch".log
python3 process_perf_data.py perf_batched_gemm_"$gpu_arch".log python3 process_perf_data.py perf_batched_gemm_"$gpu_arch".log
python3 process_perf_data.py perf_grouped_gemm_"$gpu_arch".log python3 process_perf_data.py perf_grouped_gemm_"$gpu_arch".log
python3 process_perf_data.py perf_fwd_conv_"$gpu_arch".log python3 process_perf_data.py perf_conv_fwd_"$gpu_arch".log
python3 process_perf_data.py perf_bwd_conv_"$gpu_arch".log python3 process_perf_data.py perf_conv_bwd_data_"$gpu_arch".log
python3 process_perf_data.py perf_fusion_"$gpu_arch".log python3 process_perf_data.py perf_gemm_bilinear_"$gpu_arch".log
python3 process_perf_data.py perf_reduction_"$gpu_arch".log python3 process_perf_data.py perf_reduction_"$gpu_arch".log
\ No newline at end of file
...@@ -9,7 +9,7 @@ LAYOUT=$3 ...@@ -9,7 +9,7 @@ LAYOUT=$3
VERIFY=$4 VERIFY=$4
INIT=$5 INIT=$5
LOG=$6 LOG=$6
REPEAT=$7 TIME=$7
OP=$1 OP=$1
DATATYPE=$2 DATATYPE=$2
...@@ -17,28 +17,28 @@ LAYOUT=$3 ...@@ -17,28 +17,28 @@ LAYOUT=$3
VERIFY=$4 VERIFY=$4
INIT=$5 INIT=$5
LOG=$6 LOG=$6
REPEAT=$7 TIME=$7
######## op datatype layout verify init log repeat M___ N___ K___ StrideA StrideB StrideC BatchStrideA BatchStrideB BatchStrideC BatchCount ######## op datatype layout verify init log time M___ N___ K___ StrideA StrideB StrideC BatchStrideA BatchStrideB BatchStrideC BatchCount
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 960 1024 1024 -1 -1 -1 -1 -1 -1 8 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 960 1024 1024 -1 -1 -1 -1 -1 -1 8
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1920 2048 2048 -1 -1 -1 -1 -1 -1 8 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1920 2048 2048 -1 -1 -1 -1 -1 -1 8
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 3840 4096 4096 -1 -1 -1 -1 -1 -1 4 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 3840 4096 4096 -1 -1 -1 -1 -1 -1 4
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 7680 8192 8192 -1 -1 -1 -1 -1 -1 2 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 7680 8192 8192 -1 -1 -1 -1 -1 -1 2
####### op datatype layout verify init log repeat M___ N___ K___ StrideA StrideB StrideC BatchStrideA BatchStrideB BatchStrideC BatchCount ####### op datatype layout verify init log time M___ N___ K___ StrideA StrideB StrideC BatchStrideA BatchStrideB BatchStrideC BatchCount
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1024 1024 1024 1024 1024 1024 -1 -1 -1 8 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1024 1024 1024 1024 1024 1024 -1 -1 -1 8
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 2048 2048 2048 2048 2048 2048 -1 -1 -1 8 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2048 2048 2048 2048 2048 2048 -1 -1 -1 8
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 4096 4096 4096 4096 4096 4096 -1 -1 -1 4 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 4096 4096 4096 4096 4096 4096 -1 -1 -1 4
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 8192 8192 8192 8192 8192 8192 -1 -1 -1 2 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 8192 8192 8192 8192 8192 8192 -1 -1 -1 2
####### op datatype layout verify init log repeat M___ N___ K___ StrideA StrideB StrideC BatchStrideA BatchStrideB BatchStrideC BatchCount ####### op datatype layout verify init log time M___ N___ K___ StrideA StrideB StrideC BatchStrideA BatchStrideB BatchStrideC BatchCount
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1024 1024 1024 1056 1056 1056 -1 -1 -1 8 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1024 1024 1024 1056 1056 1056 -1 -1 -1 8
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 2048 2048 2048 2080 2080 2080 -1 -1 -1 8 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2048 2048 2048 2080 2080 2080 -1 -1 -1 8
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 4096 4096 4096 4128 4128 4128 -1 -1 -1 4 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 4096 4096 4096 4128 4128 4128 -1 -1 -1 4
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 8192 8192 8192 8224 8224 8224 -1 -1 -1 2 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 8192 8192 8192 8224 8224 8224 -1 -1 -1 2
####### op datatype layout verify init log repeat M___ N___ K___ StrideA StrideB StrideC BatchStrideA BatchStrideB BatchStrideC BatchCount ####### op datatype layout verify init log time M___ N___ K___ StrideA StrideB StrideC BatchStrideA BatchStrideB BatchStrideC BatchCount
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1024 1024 1024 1088 1088 1088 -1 -1 -1 8 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1024 1024 1024 1088 1088 1088 -1 -1 -1 8
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 2048 2048 2048 2112 2112 2112 -1 -1 -1 8 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2048 2048 2048 2112 2112 2112 -1 -1 -1 8
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 4096 4096 4096 4160 4160 4160 -1 -1 -1 4 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 4096 4096 4096 4160 4160 4160 -1 -1 -1 4
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 8192 8192 8192 8256 8256 8256 -1 -1 -1 2 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 8192 8192 8192 8256 8256 8256 -1 -1 -1 2
\ No newline at end of file
#!/bin/bash
## GPU visibility
export HIP_VISIBLE_DEVICES=0
DRIVER="../build/bin/ckProfiler"
OP=$1
DATATYPE=$2
IN_LAYOUT=$3
WEI_LAYOUT=$4
OUT_LAYOUT=$5
VERIFY=$6
INIT=$7
LOG=$8
REPEAT=$9
N=${10}
######## op datatype in_layout wei_layout out_layout verify init log repeat N__ K___ C___ Y X Hi__ Wi__ Strides Dilations LeftPads RightPads
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 1024 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 128 3 3 56 56 2 2 1 1 1 1 1 1
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 2048 1 1 7 7 1 1 1 1 0 0 0 0
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 256 3 3 28 28 2 2 1 1 1 1 1 1
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 256 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 256 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 512 3 3 14 14 2 2 1 1 1 1 1 1
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 512 1 1 28 28 1 1 1 1 0 0 0 0
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 512 512 3 3 7 7 1 1 1 1 1 1 1 1
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1
$DRIVER $OP $DATATYPE $IN_LAYOUT $WEI_LAYOUT $OUT_LAYOUT $VERIFY $INIT $LOG $REPEAT $N 64 3 7 7 224 224 2 2 1 1 3 3 3 3
#!/bin/bash
## GPU visibility
export HIP_VISIBLE_DEVICES=0
DRIVER="../build/bin/ckProfiler"
OP=$1
DATATYPE=$2
LAYOUT=$3
VERIFY=$4
INIT=$5
LOG=$6
TIME=$7
N=$8
# Resnet50
######## op datatype layout verify init log time conv_dim G__ N__ K___ C___ Y X Hi__ Wi__ Strides Dilations LeftPads RightPads
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 1024 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 128 3 3 56 56 2 2 1 1 1 1 1 1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 2048 1 1 7 7 1 1 1 1 0 0 0 0
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 256 3 3 28 28 2 2 1 1 1 1 1 1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 256 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 256 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 512 3 3 14 14 2 2 1 1 1 1 1 1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 512 1 1 28 28 1 1 1 1 0 0 0 0
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 512 3 3 7 7 1 1 1 1 1 1 1 1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 64 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 3 7 7 224 224 2 2 1 1 3 3 3 3
#!/bin/bash
## GPU visibility
export HIP_VISIBLE_DEVICES=0
DRIVER="../build/bin/ckProfiler"
OP=$1
DATATYPE=$2
LAYOUT=$3
VERIFY=$4
INIT=$5
LOG=$6
TIME=$7
N=$8
# Resnet50
######## op datatype layout verify init log time conv_dim G__ N__ K___ C___ Y X Hi__ Wi__ Strides Dilations LeftPads RightPads
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 1024 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 1024 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 128 3 3 28 28 1 1 1 1 1 1 1 1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 128 1 1 28 28 1 1 1 1 0 0 0 0
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 128 3 3 56 56 2 2 1 1 1 1 1 1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 2048 1 1 7 7 1 1 1 1 0 0 0 0
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 1024 256 1 1 14 14 1 1 1 1 0 0 0 0
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 256 3 3 14 14 1 1 1 1 1 1 1 1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 256 3 3 28 28 2 2 1 1 1 1 1 1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 256 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 256 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 512 3 3 14 14 2 2 1 1 1 1 1 1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 128 512 1 1 28 28 1 1 1 1 0 0 0 0
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 512 1 1 28 28 1 1 1 1 0 0 0 0
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 2048 512 1 1 7 7 1 1 1 1 0 0 0 0
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 512 512 3 3 7 7 1 1 1 1 1 1 1 1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 256 64 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 64 1 1 56 56 1 1 1 1 0 0 0 0
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 64 3 3 56 56 1 1 1 1 1 1 1 1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2 1 $N 64 3 7 7 224 224 2 2 1 1 3 3 3 3
...@@ -2,7 +2,6 @@ ...@@ -2,7 +2,6 @@
## GPU visibility ## GPU visibility
export HIP_VISIBLE_DEVICES=0 export HIP_VISIBLE_DEVICES=0
#make -j ckProfiler
DRIVER="../build/bin/ckProfiler" DRIVER="../build/bin/ckProfiler"
echo $DRIVER echo $DRIVER
OP=$1 OP=$1
...@@ -11,43 +10,49 @@ LAYOUT=$3 ...@@ -11,43 +10,49 @@ LAYOUT=$3
VERIFY=$4 VERIFY=$4
INIT=$5 INIT=$5
LOG=$6 LOG=$6
REPEAT=$7 TIME=$7
######## op datatype layout verify init log repeat M___ N___ K___ StrideA StrideB StrideC
#$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 256 256 256 256 256 256 # 120 CU
#$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 960 1024 1024 1024 1024 1024 ######## op datatype layout verify init log time M___ N___ K___ StrideA StrideB StrideC
#$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1920 2048 2048 2048 2048 2048 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 960 1024 1024 -1 -1 -1
#$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 3840 4096 4096 4096 4096 4096 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 960 2048 2048 -1 -1 -1
#$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 7680 8192 8192 8192 8192 8192 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1920 1024 2048 -1 -1 -1
#$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1024 1024 1024 1024 1024 1024 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1920 2048 2048 -1 -1 -1
#$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 2048 2048 2048 2048 2048 2048 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 3840 4096 4096 -1 -1 -1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 7680 8192 8192 -1 -1 -1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 960 1024 1024 -1 -1 -1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1920 2048 2048 -1 -1 -1 # 104 CU
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 3840 4096 4096 -1 -1 -1 ######## op datatype layout verify init log time M___ N___ K___ StrideA StrideB StrideC
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 7680 8192 8192 -1 -1 -1 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 832 1024 1024 -1 -1 -1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 832 2048 2048 -1 -1 -1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1024 1024 1024 1024 1024 1024 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1664 1024 2048 -1 -1 -1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 2048 2048 2048 2048 2048 2048 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1664 2048 2048 -1 -1 -1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 4096 4096 4096 4096 4096 4096 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 3328 4096 4096 -1 -1 -1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 8192 8192 8192 8192 8192 8192 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 6656 8192 8192 -1 -1 -1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1024 1024 1024 1056 1056 1056 # 110 CU
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 2048 2048 2048 2080 2080 2080 ######## op datatype layout verify init log time M___ N___ K___ StrideA StrideB StrideC
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 4096 4096 4096 4128 4128 4128 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1280 1408 1024 -1 -1 -1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 8192 8192 8192 8224 8224 8224 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1280 2816 2048 -1 -1 -1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2560 1408 2048 -1 -1 -1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1024 1024 1024 1088 1088 1088 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2560 2816 2048 -1 -1 -1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 2048 2048 2048 2112 2112 2112 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 5120 5632 4096 -1 -1 -1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 4096 4096 4096 4160 4160 4160 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 7040 8192 8192 -1 -1 -1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 8192 8192 8192 8256 8256 8256
# testing different strides
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 6656 8192 8192 -1 -1 -1 ######## op datatype layout verify init log time M___ N___ K___ StrideA StrideB StrideC
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 3328 4096 4096 -1 -1 -1 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1024 1024 1024 1024 1024 1024
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1664 2048 2048 -1 -1 -1 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2048 2048 2048 2048 2048 2048
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 832 1024 1024 -1 -1 -1 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 4096 4096 4096 4096 4096 4096
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 8192 8192 8192 8192 8192 8192
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 7040 8192 8192 -1 -1 -1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 5120 5632 4096 -1 -1 -1 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1024 1024 1024 1056 1056 1056
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 2560 2816 2048 -1 -1 -1 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2048 2048 2048 2080 2080 2080
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1280 1408 1024 -1 -1 -1 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 4096 4096 4096 4128 4128 4128
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 8192 8192 8192 8224 8224 8224
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 1024 1024 1024 1088 1088 1088
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 2048 2048 2048 2112 2112 2112
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 4096 4096 4096 4160 4160 4160
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 8192 8192 8192 8256 8256 8256
#!/bin/bash
## GPU visibility
export HIP_VISIBLE_DEVICES=0
DRIVER="../build/bin/ckProfiler"
OP=$1
DATATYPE=$2
LAYOUT=$3
VERIFY=$4
INIT=$5
LOG=$6
REPEAT=$7
######## op datatype layout verify init log repeat M___ N___ K___ StrideA StrideB StrideC StrideC1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 960 1024 1024 -1 -1 -1 -1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1920 2048 2048 -1 -1 -1 -1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 3840 4096 4096 -1 -1 -1 -1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 7680 8192 8192 -1 -1 -1 -1
####### op datatype layout verify init log repeat M___ N___ K___ StrideA StrideB StrideC StrideC1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1024 1024 1024 1024 1024 1024 1024
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 2048 2048 2048 2048 2048 2048 2048
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 4096 4096 4096 4096 4096 4096 4096
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 8192 8192 8192 8192 8192 8192 8192
####### op datatype layout verify init log repeat M___ N___ K___ StrideA StrideB StrideC StrideC1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1024 1024 1024 1056 1056 1056 1056
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 2048 2048 2048 2080 2080 2080 2080
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 4096 4096 4096 4128 4128 4128 4128
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 8192 8192 8192 8224 8224 8224 8224
####### op datatype layout verify init log repeat M___ N___ K___ StrideA StrideB StrideC StrideC1
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 1024 1024 1024 1088 1088 1088 1088
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 2048 2048 2048 2112 2112 2112 2112
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 4096 4096 4096 4160 4160 4160 4160
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 8192 8192 8192 8256 8256 8256 8256
\ No newline at end of file
...@@ -9,10 +9,10 @@ LAYOUT=$3 ...@@ -9,10 +9,10 @@ LAYOUT=$3
VERIFY=$4 VERIFY=$4
INIT=$5 INIT=$5
LOG=$6 LOG=$6
REPEAT=$7 TIME=$7
######## op datatype layout verify init log repeat Ms______________ Ns______________ Ks_____________ StrideAs___________ StrideBs__________ StrideCs___________ ######## op datatype layout verify init log time Ms______________ Ns______________ Ks_____________ StrideAs___________ StrideBs__________ StrideCs___________
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 256,512,1024,768 128,256,384,1024 128,192,256,512 1024,1025,1044,1026 1024,1024,1024,1024 1025,1024,1028,1024 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 256,512,1024,768 128,256,384,1024 128,192,256,512 1024,1025,1044,1026 1024,1024,1024,1024 1025,1024,1028,1024
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 512,768,2048,128 128,256,384,1024 128,192,256,512 1024,1025,2053,1026 1024,1024,1024,1024 1025,1024,2054,1024 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 512,768,2048,128 128,256,384,1024 128,192,256,512 1024,1025,2053,1026 1024,1024,1024,1024 1025,1024,2054,1024
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 256,512,1024,768 512,256,768,1024 128,192,256,512 1024,1045,1034,1026 1024,1024,1024,1024 1025,1063,1028,1024 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 256,512,1024,768 512,256,768,1024 128,192,256,512 1024,1045,1034,1026 1024,1024,1024,1024 1025,1063,1028,1024
$DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $REPEAT 512,768,4096,768 128,768,512,2048 128,192,256,512 1024,1027,4096,2050 1024,1024,1024,2048 1025,1024,4099,2049 $DRIVER $OP $DATATYPE $LAYOUT $VERIFY $INIT $LOG $TIME 512,768,4096,768 128,768,512,2048 128,192,256,512 1024,1027,4096,2050 1024,1024,1024,2048 1025,1024,4099,2049
\ No newline at end of file
...@@ -16,10 +16,10 @@ elif [ -n $PRECISION ] && [ "$PRECISION" = "--int8" ]; then ...@@ -16,10 +16,10 @@ elif [ -n $PRECISION ] && [ "$PRECISION" = "--int8" ]; then
fi fi
#### 0 - ADD, 5 - AVG, 7 - NORM2 #### 0 - ADD, 5 - AVG, 7 - NORM2
Operations="0 5 7" Operations="0 5"
#### 0 - ADD, 5 - AVG, for int8, no NORM2 supported #### 0 - ADD, 5 - AVG, for int8, no NORM2 supported
if [ -n $PRECISION ] && [ "$PRECISION" = "--int8" ]; then if [ -n $PRECISION ] && [ "$PRECISION" = "--int8" -o "$PRECISION" = "--half" ]; then
Operations=5 Operations=5
fi fi
......
This diff is collapsed.
...@@ -40,82 +40,82 @@ function print_log_header(){ ...@@ -40,82 +40,82 @@ function print_log_header(){
#run gemm tests #run gemm tests
export gemm_log="perf_gemm_${gpu_arch}.log" export gemm_log="perf_gemm_${gpu_arch}.log"
print_log_header $gemm_log $env_type $branch $host_name print_log_header $gemm_log $env_type $branch $host_name
./profile_gemm.sh gemm 0 0 $verify 1 0 5 | tee -a $gemm_log ./profile_gemm.sh gemm 0 0 $verify 1 0 1 | tee -a $gemm_log
./profile_gemm.sh gemm 1 0 $verify 1 0 5 | tee -a $gemm_log ./profile_gemm.sh gemm 1 0 $verify 1 0 1 | tee -a $gemm_log
./profile_gemm.sh gemm 2 0 $verify 1 0 5 | tee -a $gemm_log ./profile_gemm.sh gemm 2 0 $verify 1 0 1 | tee -a $gemm_log
./profile_gemm.sh gemm 3 0 $verify 1 0 5 | tee -a $gemm_log ./profile_gemm.sh gemm 3 0 $verify 1 0 1 | tee -a $gemm_log
./profile_gemm.sh gemm 0 1 $verify 1 0 5 | tee -a $gemm_log ./profile_gemm.sh gemm 0 1 $verify 1 0 1 | tee -a $gemm_log
./profile_gemm.sh gemm 1 1 $verify 1 0 5 | tee -a $gemm_log ./profile_gemm.sh gemm 1 1 $verify 1 0 1 | tee -a $gemm_log
./profile_gemm.sh gemm 2 1 $verify 1 0 5 | tee -a $gemm_log ./profile_gemm.sh gemm 2 1 $verify 1 0 1 | tee -a $gemm_log
./profile_gemm.sh gemm 3 1 $verify 1 0 5 | tee -a $gemm_log ./profile_gemm.sh gemm 3 1 $verify 1 0 1 | tee -a $gemm_log
./profile_gemm.sh gemm 0 2 $verify 1 0 5 | tee -a $gemm_log ./profile_gemm.sh gemm 0 2 $verify 1 0 1 | tee -a $gemm_log
./profile_gemm.sh gemm 1 2 $verify 1 0 5 | tee -a $gemm_log ./profile_gemm.sh gemm 1 2 $verify 1 0 1 | tee -a $gemm_log
./profile_gemm.sh gemm 2 2 $verify 1 0 5 | tee -a $gemm_log ./profile_gemm.sh gemm 2 2 $verify 1 0 1 | tee -a $gemm_log
./profile_gemm.sh gemm 3 2 $verify 1 0 5 | tee -a $gemm_log ./profile_gemm.sh gemm 3 2 $verify 1 0 1 | tee -a $gemm_log
./profile_gemm.sh gemm 0 3 $verify 1 0 5 | tee -a $gemm_log ./profile_gemm.sh gemm 0 3 $verify 1 0 1 | tee -a $gemm_log
./profile_gemm.sh gemm 1 3 $verify 1 0 5 | tee -a $gemm_log ./profile_gemm.sh gemm 1 3 $verify 1 0 1 | tee -a $gemm_log
./profile_gemm.sh gemm 2 3 $verify 1 0 5 | tee -a $gemm_log ./profile_gemm.sh gemm 2 3 $verify 1 0 1 | tee -a $gemm_log
./profile_gemm.sh gemm 3 3 $verify 1 0 5 | tee -a $gemm_log ./profile_gemm.sh gemm 3 3 $verify 1 0 1 | tee -a $gemm_log
#run resnet50 tests
export resnet256_log="perf_resnet50_N256_${gpu_arch}.log"
print_log_header $resnet256_log $env_type $branch $host_name
./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 $verify 2 0 1 256 | tee -a $resnet256_log
export resnet4_log="perf_resnet50_N4_${gpu_arch}.log"
print_log_header $resnet4_log $env_type $branch $host_name
./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 $verify 2 0 1 4 | tee -a $resnet4_log
#run batched_gemm tests #run batched_gemm tests
export batched_gemm_log="perf_batched_gemm_${gpu_arch}.log" export batched_gemm_log="perf_batched_gemm_${gpu_arch}.log"
print_log_header $batched_gemm_log $env_type $branch $host_name print_log_header $batched_gemm_log $env_type $branch $host_name
./profile_batched_gemm.sh batched_gemm 0 0 $verify 2 0 5 | tee -a $batched_gemm_log ./profile_batched_gemm.sh batched_gemm 0 0 $verify 1 0 1 | tee -a $batched_gemm_log
./profile_batched_gemm.sh batched_gemm 0 1 $verify 2 0 5 | tee -a $batched_gemm_log ./profile_batched_gemm.sh batched_gemm 0 1 $verify 1 0 1 | tee -a $batched_gemm_log
./profile_batched_gemm.sh batched_gemm 0 2 $verify 2 0 5 | tee -a $batched_gemm_log ./profile_batched_gemm.sh batched_gemm 0 2 $verify 1 0 1 | tee -a $batched_gemm_log
./profile_batched_gemm.sh batched_gemm 0 3 $verify 2 0 5 | tee -a $batched_gemm_log ./profile_batched_gemm.sh batched_gemm 0 3 $verify 1 0 1 | tee -a $batched_gemm_log
./profile_batched_gemm.sh batched_gemm 1 0 $verify 2 0 5 | tee -a $batched_gemm_log ./profile_batched_gemm.sh batched_gemm 1 0 $verify 1 0 1 | tee -a $batched_gemm_log
./profile_batched_gemm.sh batched_gemm 1 1 $verify 2 0 5 | tee -a $batched_gemm_log ./profile_batched_gemm.sh batched_gemm 1 1 $verify 1 0 1 | tee -a $batched_gemm_log
./profile_batched_gemm.sh batched_gemm 1 2 $verify 2 0 5 | tee -a $batched_gemm_log ./profile_batched_gemm.sh batched_gemm 1 2 $verify 1 0 1 | tee -a $batched_gemm_log
./profile_batched_gemm.sh batched_gemm 1 3 $verify 2 0 5 | tee -a $batched_gemm_log ./profile_batched_gemm.sh batched_gemm 1 3 $verify 1 0 1 | tee -a $batched_gemm_log
./profile_batched_gemm.sh batched_gemm 2 0 $verify 2 0 5 | tee -a $batched_gemm_log ./profile_batched_gemm.sh batched_gemm 2 0 $verify 1 0 1 | tee -a $batched_gemm_log
./profile_batched_gemm.sh batched_gemm 2 1 $verify 2 0 5 | tee -a $batched_gemm_log ./profile_batched_gemm.sh batched_gemm 2 1 $verify 1 0 1 | tee -a $batched_gemm_log
./profile_batched_gemm.sh batched_gemm 2 2 $verify 2 0 5 | tee -a $batched_gemm_log ./profile_batched_gemm.sh batched_gemm 2 2 $verify 1 0 1 | tee -a $batched_gemm_log
./profile_batched_gemm.sh batched_gemm 2 3 $verify 2 0 5 | tee -a $batched_gemm_log ./profile_batched_gemm.sh batched_gemm 2 3 $verify 1 0 1 | tee -a $batched_gemm_log
./profile_batched_gemm.sh batched_gemm 3 0 $verify 2 0 5 | tee -a $batched_gemm_log ./profile_batched_gemm.sh batched_gemm 3 0 $verify 1 0 1 | tee -a $batched_gemm_log
./profile_batched_gemm.sh batched_gemm 3 1 $verify 2 0 5 | tee -a $batched_gemm_log ./profile_batched_gemm.sh batched_gemm 3 1 $verify 1 0 1 | tee -a $batched_gemm_log
./profile_batched_gemm.sh batched_gemm 3 2 $verify 2 0 5 | tee -a $batched_gemm_log ./profile_batched_gemm.sh batched_gemm 3 2 $verify 1 0 1 | tee -a $batched_gemm_log
./profile_batched_gemm.sh batched_gemm 3 3 $verify 2 0 5 | tee -a $batched_gemm_log ./profile_batched_gemm.sh batched_gemm 3 3 $verify 1 0 1 | tee -a $batched_gemm_log
#run grouped_gemm tests #run grouped_gemm tests
export grouped_gemm_log="perf_grouped_gemm_${gpu_arch}.log" export grouped_gemm_log="perf_grouped_gemm_${gpu_arch}.log"
print_log_header $grouped_gemm_log $env_type $branch $host_name print_log_header $grouped_gemm_log $env_type $branch $host_name
./profile_grouped_gemm.sh grouped_gemm 1 0 $verify 2 0 5 | tee -a $grouped_gemm_log ./profile_grouped_gemm.sh grouped_gemm 1 0 $verify 1 0 1 | tee -a $grouped_gemm_log
./profile_grouped_gemm.sh grouped_gemm 1 1 $verify 2 0 5 | tee -a $grouped_gemm_log ./profile_grouped_gemm.sh grouped_gemm 1 1 $verify 1 0 1 | tee -a $grouped_gemm_log
./profile_grouped_gemm.sh grouped_gemm 1 2 $verify 2 0 5 | tee -a $grouped_gemm_log ./profile_grouped_gemm.sh grouped_gemm 1 2 $verify 1 0 1 | tee -a $grouped_gemm_log
./profile_grouped_gemm.sh grouped_gemm 1 3 $verify 2 0 5 | tee -a $grouped_gemm_log ./profile_grouped_gemm.sh grouped_gemm 1 3 $verify 1 0 1 | tee -a $grouped_gemm_log
#run fwd_conv tests #run GEMM+Bilinear tests
export fwd_conv_log="perf_fwd_conv_${gpu_arch}.log" export gemm_bilinear_log="perf_gemm_bilinear_${gpu_arch}.log"
print_log_header $fwd_conv_log $env_type $branch $host_name print_log_header $gemm_bilinear_log $env_type $branch $host_name
./profile_conv.sh conv_fwd 0 1 $verify 2 0 5 2 256 | tee -a $fwd_conv_log ./profile_gemm_bilinear.sh gemm_bilinear 1 0 $verify 1 0 1 | tee -a $gemm_bilinear_log
./profile_conv.sh conv_fwd 1 1 $verify 2 0 5 2 256 | tee -a $fwd_conv_log ./profile_gemm_bilinear.sh gemm_bilinear 1 1 $verify 1 0 1 | tee -a $gemm_bilinear_log
./profile_conv.sh conv_fwd 2 1 $verify 2 0 5 2 256 | tee -a $fwd_conv_log ./profile_gemm_bilinear.sh gemm_bilinear 1 2 $verify 1 0 1 | tee -a $gemm_bilinear_log
./profile_conv.sh conv_fwd 3 1 $verify 2 0 5 2 256 | tee -a $fwd_conv_log ./profile_gemm_bilinear.sh gemm_bilinear 1 3 $verify 1 0 1 | tee -a $gemm_bilinear_log
#run bwd_conv tests #run conv_fwd tests
export bwd_conv_log="perf_bwd_conv_${gpu_arch}.log" export conv_fwd_log="perf_conv_fwd_${gpu_arch}.log"
print_log_header $bwd_conv_log $env_type $branch $host_name print_log_header $conv_fwd_log $env_type $branch $host_name
./profile_conv.sh conv2d_bwd_data 0 1 1 1 $verify 2 0 5 128 | tee -a $bwd_conv_log ./profile_conv_fwd.sh conv_fwd 0 1 $verify 1 0 1 256 | tee -a $conv_fwd_log
./profile_conv.sh conv2d_bwd_data 1 1 1 1 $verify 2 0 5 128 | tee -a $bwd_conv_log ./profile_conv_fwd.sh conv_fwd 1 1 $verify 1 0 1 256 | tee -a $conv_fwd_log
./profile_conv.sh conv2d_bwd_data 2 1 1 1 $verify 2 0 5 128 | tee -a $bwd_conv_log ./profile_conv_fwd.sh conv_fwd 2 1 $verify 1 0 1 256 | tee -a $conv_fwd_log
./profile_conv.sh conv2d_bwd_data 3 1 1 1 $verify 2 0 5 128 | tee -a $bwd_conv_log ./profile_conv_fwd.sh conv_fwd 3 1 $verify 1 0 1 256 | tee -a $conv_fwd_log
#run fusion tests #run conv_bwd_data tests
export fusion_log="perf_fusion_${gpu_arch}.log" export conv_bwd_data_log="perf_conv_bwd_data_${gpu_arch}.log"
print_log_header $fusion_log $env_type $branch $host_name print_log_header $conv_bwd_data_log $env_type $branch $host_name
./profile_gemm_bilinear.sh gemm_bilinear 1 0 $verify 2 0 1 | tee -a $fusion_log ./profile_conv_bwd_data.sh conv_bwd_data 0 1 $verify 1 0 1 256 | tee -a $conv_bwd_data_log
./profile_gemm_bilinear.sh gemm_bilinear 1 1 $verify 2 0 1 | tee -a $fusion_log ./profile_conv_bwd_data.sh conv_bwd_data 1 1 $verify 1 0 1 256 | tee -a $conv_bwd_data_log
./profile_gemm_bilinear.sh gemm_bilinear 1 2 $verify 2 0 1 | tee -a $fusion_log ./profile_conv_bwd_data.sh conv_bwd_data 2 1 $verify 1 0 1 256 | tee -a $conv_bwd_data_log
./profile_gemm_bilinear.sh gemm_bilinear 1 3 $verify 2 0 1 | tee -a $fusion_log ./profile_conv_bwd_data.sh conv_bwd_data 3 1 $verify 1 0 1 256 | tee -a $conv_bwd_data_log
#run resnet50 tests
export resnet256_log="perf_resnet50_N256_${gpu_arch}.log"
print_log_header $resnet256_log $env_type $branch $host_name
./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 $verify 1 0 1 256 | tee -a $resnet256_log
export resnet4_log="perf_resnet50_N4_${gpu_arch}.log"
print_log_header $resnet4_log $env_type $branch $host_name
./profile_resnet50.sh conv_fwd_bias_relu 1 1 1 1 $verify 1 0 1 4 | tee -a $resnet4_log
#run reduction tests #run reduction tests
export reduction_log="perf_reduction_${gpu_arch}.log" export reduction_log="perf_reduction_${gpu_arch}.log"
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment