"vscode:/vscode.git/clone" did not exist on "35b2971ebb7bc4b498301b1aaeea8b201cecf8dd"
Commit 7582c18e authored by muozturk's avatar muozturk
Browse files

bug fix

parent f0a8ee84
...@@ -5,6 +5,10 @@ ...@@ -5,6 +5,10 @@
#include <sys/time.h> #include <sys/time.h>
#include <locale.h> #include <locale.h>
#include <algorithm> #include <algorithm>
#include <hip/hip_runtime.h>
#include <hip/hip_runtime_api.h>
using namespace std; using namespace std;
// created by tc_gen_definition_new() // created by tc_gen_definition_new()
......
4D_kernel: 4D_kernel:
hipcc -O3 --offload-arch=gfx90a main.cpp 4D_kernel.hpp -o $@ hipcc -O3 --offload-arch=gfx90a main.cpp -o $@
clean: clean:
......
...@@ -4,6 +4,7 @@ ...@@ -4,6 +4,7 @@
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include "4D_kernel.hpp"
//#define DEBUG_CORRECTNESS //#define DEBUG_CORRECTNESS
//#define DEBUG_SIMPLE_CORRECTNESS //#define DEBUG_SIMPLE_CORRECTNESS
...@@ -11,78 +12,6 @@ ...@@ -11,78 +12,6 @@
void pre_Initializing_Input_Tensors(); void pre_Initializing_Input_Tensors();
void post_Correctness(); void post_Correctness();
//
// t3 [a,16,b,16,c,16,d,16] += sum(e,16,f,16) * t2 [a,e,b,f] * v2 [d,f,c,e];
//
int main(int argc, char** argv)
{
// for sd2
float *host_C, *host_C_chk;
float *host_A;
float *host_B;
int size_idx_a, size_idx_b, size_idx_c, size_idx_d, size_idx_e, size_idx_f;
// Problem Size
size_idx_a = 16;
size_idx_b = 16;
size_idx_c = 16;
size_idx_d = 16;
size_idx_e = 16;
size_idx_f = 16;
//
if (argc == 7)
{
size_idx_a = atoi(argv[1]);
size_idx_b = atoi(argv[2]);
size_idx_c = atoi(argv[3]);
size_idx_d = atoi(argv[4]);
size_idx_e = atoi(argv[5]);
size_idx_f = atoi(argv[6]);
}
int size_C;
int size_A;
int size_B;
int size_internal;
// t3 [a,16,b,16,c,16,d,16] += sum(e,16,f,16) * t2 [a,e,b,f] * v2 [d,f,c,e];
size_internal = size_idx_e * size_idx_f;
size_C = size_idx_a * size_idx_b * size_idx_c * size_idx_d;
size_A = size_idx_a * size_idx_e * size_idx_b * size_idx_f;
size_B = size_idx_d * size_idx_f * size_idx_c * size_idx_e;
//
host_C = (float*)malloc(sizeof(float) * size_C);
host_C_chk = (float*)malloc(sizeof(float) * size_C);
host_A = (float*)malloc(sizeof(float) * size_A);
host_B = (float*)malloc(sizeof(float) * size_B);
printf ("==========================================================================================================\n");
printf (">>> abcd-aebf-dfce\n");
printf (">>> t3 [a,16,b,16,c,16,d,16] += sum(e,16,f,16) * t2 [a,e,b,f] * v2 [d,f,c,e];\n");
printf (">>> Problem Size (a,b,c,d) and (e,f): (%2d,%2d,%2d,%2d) and (%2d,%2d)\n", size_idx_a, size_idx_b, size_idx_c, size_idx_d, size_idx_e, size_idx_f);
printf ("==========================================================================================================\n");
// Initialze "1" Output and "2 x 9" Inputs
pre_Initializing_Input_Tensors(host_C, host_C_chk, size_C, host_A, size_A, host_B, size_B);
// Run the Kernels
sd_t_d2_fusion_(size_idx_a, size_idx_b, size_idx_c, size_idx_d, size_idx_e, size_idx_f, host_C, host_A, host_B, 1, -1);
#ifdef DEBUG_CORRECTNESS
// Correctness-Check
post_Correctness(host_C, host_C_chk, host_A, host_B, size_idx_a, size_idx_b, size_idx_c, size_idx_d, size_idx_e, size_idx_f);
#endif
// Free
free(host_C); free(host_C_chk);
free(host_A);
free(host_B);
return 0;
}
// Initialize t3 (t3_temp), 9 t2 and 9 v2. // Initialize t3 (t3_temp), 9 t2 and 9 v2.
void pre_Initializing_Input_Tensors(float* h_C, float* h_C_chk, int size_C, float* h_A, int size_A, float* h_B, int size_B) void pre_Initializing_Input_Tensors(float* h_C, float* h_C_chk, int size_C, float* h_A, int size_A, float* h_B, int size_B)
{ {
...@@ -164,3 +93,77 @@ void post_Correctness(float* h_C, float* h_C_chk, float* h_A, float* h_B, int si ...@@ -164,3 +93,77 @@ void post_Correctness(float* h_C, float* h_C_chk, float* h_A, float* h_B, int si
printf (" >>> Total Operations: %'lld\n", tmp_ops * 2); printf (" >>> Total Operations: %'lld\n", tmp_ops * 2);
printf ("====================================================================================================\n"); printf ("====================================================================================================\n");
} }
//
// t3 [a,16,b,16,c,16,d,16] += sum(e,16,f,16) * t2 [a,e,b,f] * v2 [d,f,c,e];
//
int main(int argc, char** argv)
{
// for sd2
float *host_C, *host_C_chk;
float *host_A;
float *host_B;
int size_idx_a, size_idx_b, size_idx_c, size_idx_d, size_idx_e, size_idx_f;
// Problem Size
size_idx_a = 16;
size_idx_b = 16;
size_idx_c = 16;
size_idx_d = 16;
size_idx_e = 16;
size_idx_f = 16;
//
if (argc == 7)
{
size_idx_a = atoi(argv[1]);
size_idx_b = atoi(argv[2]);
size_idx_c = atoi(argv[3]);
size_idx_d = atoi(argv[4]);
size_idx_e = atoi(argv[5]);
size_idx_f = atoi(argv[6]);
}
int size_C;
int size_A;
int size_B;
int size_internal;
// t3 [a,16,b,16,c,16,d,16] += sum(e,16,f,16) * t2 [a,e,b,f] * v2 [d,f,c,e];
size_internal = size_idx_e * size_idx_f;
size_C = size_idx_a * size_idx_b * size_idx_c * size_idx_d;
size_A = size_idx_a * size_idx_e * size_idx_b * size_idx_f;
size_B = size_idx_d * size_idx_f * size_idx_c * size_idx_e;
//
host_C = (float*)malloc(sizeof(float) * size_C);
host_C_chk = (float*)malloc(sizeof(float) * size_C);
host_A = (float*)malloc(sizeof(float) * size_A);
host_B = (float*)malloc(sizeof(float) * size_B);
printf ("==========================================================================================================\n");
printf (">>> abcd-aebf-dfce\n");
printf (">>> t3 [a,16,b,16,c,16,d,16] += sum(e,16,f,16) * t2 [a,e,b,f] * v2 [d,f,c,e];\n");
printf (">>> Problem Size (a,b,c,d) and (e,f): (%2d,%2d,%2d,%2d) and (%2d,%2d)\n", size_idx_a, size_idx_b, size_idx_c, size_idx_d, size_idx_e, size_idx_f);
printf ("==========================================================================================================\n");
// Initialze "1" Output and "2 x 9" Inputs
pre_Initializing_Input_Tensors(host_C, host_C_chk, size_C, host_A, size_A, host_B, size_B);
// Run the Kernels
sd_t_d2_fusion_(size_idx_a, size_idx_b, size_idx_c, size_idx_d, size_idx_e, size_idx_f, host_C, host_A, host_B, 1, -1);
#ifdef DEBUG_CORRECTNESS
// Correctness-Check
post_Correctness(host_C, host_C_chk, host_A, host_B, size_idx_a, size_idx_b, size_idx_c, size_idx_d, size_idx_e, size_idx_f);
#endif
// Free
free(host_C); free(host_C_chk);
free(host_A);
free(host_B);
return 0;
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment