Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
7582c18e
Commit
7582c18e
authored
Oct 10, 2023
by
muozturk
Browse files
bug fix
parent
f0a8ee84
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
80 additions
and
73 deletions
+80
-73
example/complex_contraction/4D_kernel.hpp
example/complex_contraction/4D_kernel.hpp
+4
-0
example/complex_contraction/Makefile
example/complex_contraction/Makefile
+1
-1
example/complex_contraction/main.cpp
example/complex_contraction/main.cpp
+75
-72
No files found.
example/complex_contraction/4D_kernel.hpp
View file @
7582c18e
...
...
@@ -5,6 +5,10 @@
#include <sys/time.h>
#include <locale.h>
#include <algorithm>
#include <hip/hip_runtime.h>
#include <hip/hip_runtime_api.h>
using
namespace
std
;
// created by tc_gen_definition_new()
...
...
example/complex_contraction/Makefile
View file @
7582c18e
4D_kernel
:
hipcc
-O3
--offload-arch
=
gfx90a main.cpp
4D_kernel.hpp
-o
$@
hipcc
-O3
--offload-arch
=
gfx90a main.cpp
-o
$@
clean
:
...
...
example/complex_contraction/main.cpp
View file @
7582c18e
...
...
@@ -4,6 +4,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "4D_kernel.hpp"
//#define DEBUG_CORRECTNESS
//#define DEBUG_SIMPLE_CORRECTNESS
...
...
@@ -11,78 +12,6 @@
void
pre_Initializing_Input_Tensors
();
void
post_Correctness
();
//
// t3 [a,16,b,16,c,16,d,16] += sum(e,16,f,16) * t2 [a,e,b,f] * v2 [d,f,c,e];
//
int
main
(
int
argc
,
char
**
argv
)
{
// for sd2
float
*
host_C
,
*
host_C_chk
;
float
*
host_A
;
float
*
host_B
;
int
size_idx_a
,
size_idx_b
,
size_idx_c
,
size_idx_d
,
size_idx_e
,
size_idx_f
;
// Problem Size
size_idx_a
=
16
;
size_idx_b
=
16
;
size_idx_c
=
16
;
size_idx_d
=
16
;
size_idx_e
=
16
;
size_idx_f
=
16
;
//
if
(
argc
==
7
)
{
size_idx_a
=
atoi
(
argv
[
1
]);
size_idx_b
=
atoi
(
argv
[
2
]);
size_idx_c
=
atoi
(
argv
[
3
]);
size_idx_d
=
atoi
(
argv
[
4
]);
size_idx_e
=
atoi
(
argv
[
5
]);
size_idx_f
=
atoi
(
argv
[
6
]);
}
int
size_C
;
int
size_A
;
int
size_B
;
int
size_internal
;
// t3 [a,16,b,16,c,16,d,16] += sum(e,16,f,16) * t2 [a,e,b,f] * v2 [d,f,c,e];
size_internal
=
size_idx_e
*
size_idx_f
;
size_C
=
size_idx_a
*
size_idx_b
*
size_idx_c
*
size_idx_d
;
size_A
=
size_idx_a
*
size_idx_e
*
size_idx_b
*
size_idx_f
;
size_B
=
size_idx_d
*
size_idx_f
*
size_idx_c
*
size_idx_e
;
//
host_C
=
(
float
*
)
malloc
(
sizeof
(
float
)
*
size_C
);
host_C_chk
=
(
float
*
)
malloc
(
sizeof
(
float
)
*
size_C
);
host_A
=
(
float
*
)
malloc
(
sizeof
(
float
)
*
size_A
);
host_B
=
(
float
*
)
malloc
(
sizeof
(
float
)
*
size_B
);
printf
(
"==========================================================================================================
\n
"
);
printf
(
">>> abcd-aebf-dfce
\n
"
);
printf
(
">>> t3 [a,16,b,16,c,16,d,16] += sum(e,16,f,16) * t2 [a,e,b,f] * v2 [d,f,c,e];
\n
"
);
printf
(
">>> Problem Size (a,b,c,d) and (e,f): (%2d,%2d,%2d,%2d) and (%2d,%2d)
\n
"
,
size_idx_a
,
size_idx_b
,
size_idx_c
,
size_idx_d
,
size_idx_e
,
size_idx_f
);
printf
(
"==========================================================================================================
\n
"
);
// Initialze "1" Output and "2 x 9" Inputs
pre_Initializing_Input_Tensors
(
host_C
,
host_C_chk
,
size_C
,
host_A
,
size_A
,
host_B
,
size_B
);
// Run the Kernels
sd_t_d2_fusion_
(
size_idx_a
,
size_idx_b
,
size_idx_c
,
size_idx_d
,
size_idx_e
,
size_idx_f
,
host_C
,
host_A
,
host_B
,
1
,
-
1
);
#ifdef DEBUG_CORRECTNESS
// Correctness-Check
post_Correctness
(
host_C
,
host_C_chk
,
host_A
,
host_B
,
size_idx_a
,
size_idx_b
,
size_idx_c
,
size_idx_d
,
size_idx_e
,
size_idx_f
);
#endif
// Free
free
(
host_C
);
free
(
host_C_chk
);
free
(
host_A
);
free
(
host_B
);
return
0
;
}
// Initialize t3 (t3_temp), 9 t2 and 9 v2.
void
pre_Initializing_Input_Tensors
(
float
*
h_C
,
float
*
h_C_chk
,
int
size_C
,
float
*
h_A
,
int
size_A
,
float
*
h_B
,
int
size_B
)
{
...
...
@@ -164,3 +93,77 @@ void post_Correctness(float* h_C, float* h_C_chk, float* h_A, float* h_B, int si
printf
(
" >>> Total Operations: %'lld
\n
"
,
tmp_ops
*
2
);
printf
(
"====================================================================================================
\n
"
);
}
//
// t3 [a,16,b,16,c,16,d,16] += sum(e,16,f,16) * t2 [a,e,b,f] * v2 [d,f,c,e];
//
int
main
(
int
argc
,
char
**
argv
)
{
// for sd2
float
*
host_C
,
*
host_C_chk
;
float
*
host_A
;
float
*
host_B
;
int
size_idx_a
,
size_idx_b
,
size_idx_c
,
size_idx_d
,
size_idx_e
,
size_idx_f
;
// Problem Size
size_idx_a
=
16
;
size_idx_b
=
16
;
size_idx_c
=
16
;
size_idx_d
=
16
;
size_idx_e
=
16
;
size_idx_f
=
16
;
//
if
(
argc
==
7
)
{
size_idx_a
=
atoi
(
argv
[
1
]);
size_idx_b
=
atoi
(
argv
[
2
]);
size_idx_c
=
atoi
(
argv
[
3
]);
size_idx_d
=
atoi
(
argv
[
4
]);
size_idx_e
=
atoi
(
argv
[
5
]);
size_idx_f
=
atoi
(
argv
[
6
]);
}
int
size_C
;
int
size_A
;
int
size_B
;
int
size_internal
;
// t3 [a,16,b,16,c,16,d,16] += sum(e,16,f,16) * t2 [a,e,b,f] * v2 [d,f,c,e];
size_internal
=
size_idx_e
*
size_idx_f
;
size_C
=
size_idx_a
*
size_idx_b
*
size_idx_c
*
size_idx_d
;
size_A
=
size_idx_a
*
size_idx_e
*
size_idx_b
*
size_idx_f
;
size_B
=
size_idx_d
*
size_idx_f
*
size_idx_c
*
size_idx_e
;
//
host_C
=
(
float
*
)
malloc
(
sizeof
(
float
)
*
size_C
);
host_C_chk
=
(
float
*
)
malloc
(
sizeof
(
float
)
*
size_C
);
host_A
=
(
float
*
)
malloc
(
sizeof
(
float
)
*
size_A
);
host_B
=
(
float
*
)
malloc
(
sizeof
(
float
)
*
size_B
);
printf
(
"==========================================================================================================
\n
"
);
printf
(
">>> abcd-aebf-dfce
\n
"
);
printf
(
">>> t3 [a,16,b,16,c,16,d,16] += sum(e,16,f,16) * t2 [a,e,b,f] * v2 [d,f,c,e];
\n
"
);
printf
(
">>> Problem Size (a,b,c,d) and (e,f): (%2d,%2d,%2d,%2d) and (%2d,%2d)
\n
"
,
size_idx_a
,
size_idx_b
,
size_idx_c
,
size_idx_d
,
size_idx_e
,
size_idx_f
);
printf
(
"==========================================================================================================
\n
"
);
// Initialze "1" Output and "2 x 9" Inputs
pre_Initializing_Input_Tensors
(
host_C
,
host_C_chk
,
size_C
,
host_A
,
size_A
,
host_B
,
size_B
);
// Run the Kernels
sd_t_d2_fusion_
(
size_idx_a
,
size_idx_b
,
size_idx_c
,
size_idx_d
,
size_idx_e
,
size_idx_f
,
host_C
,
host_A
,
host_B
,
1
,
-
1
);
#ifdef DEBUG_CORRECTNESS
// Correctness-Check
post_Correctness
(
host_C
,
host_C_chk
,
host_A
,
host_B
,
size_idx_a
,
size_idx_b
,
size_idx_c
,
size_idx_d
,
size_idx_e
,
size_idx_f
);
#endif
// Free
free
(
host_C
);
free
(
host_C_chk
);
free
(
host_A
);
free
(
host_B
);
return
0
;
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment