Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
44078dba
Commit
44078dba
authored
Jun 01, 2021
by
Jing Zhang
Browse files
clean code
parent
cc77ab57
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
6 additions
and
147 deletions
+6
-147
composable_kernel/include/tensor_operation/xdlops_gemm.hpp
composable_kernel/include/tensor_operation/xdlops_gemm.hpp
+2
-0
composable_kernel/include/utility/amd_xdlops.hpp
composable_kernel/include/utility/amd_xdlops.hpp
+4
-147
No files found.
composable_kernel/include/tensor_operation/xdlops_gemm.hpp
View file @
44078dba
...
...
@@ -324,6 +324,7 @@ struct mfma_info<mfma_instr::mfma_f32_4x4x4f16>
}
};
#if 0
template <>
struct mfma_info<mfma_instr::mfma_f32_32x32x2bf16>
{
...
...
@@ -489,6 +490,7 @@ struct mfma_info<mfma_instr::mfma_f32_4x4x2bf16>
return intrin_mfma_f32_4x4x2bf16<MPerXdlops, NPerXdlops>::run(p_a, p_b, reg_c);
}
};
#endif
template
<
mfma_instr
instr
,
index_t
MPerXdlops_
,
index_t
NPerXdlops_
>
struct
xdlops_info
...
...
composable_kernel/include/utility/amd_xdlops.hpp
View file @
44078dba
...
...
@@ -5,153 +5,6 @@
namespace
ck
{
struct
c_vec32_4_t
{
union
VecType
{
struct
{
float32_t
x
;
float32_t
y
;
float32_t
z
;
float32_t
w
;
}
s
;
float
n
[
128
];
};
__host__
__device__
static
VecType
CreateVecZero
()
{
VecType
c
;
c
.
s
.
x
=
0
;
c
.
s
.
y
=
0
;
c
.
s
.
z
=
0
;
c
.
s
.
w
=
0
;
return
c
;
}
};
struct
c_vec32_2_t
{
union
VecType
{
struct
{
float32_t
x
;
float32_t
y
;
}
s
;
float
n
[
64
];
}
l
;
__host__
__device__
static
VecType
CreateVecZero
()
{
VecType
c
;
c
.
s
.
x
=
0
;
c
.
s
.
y
=
0
;
return
c
;
}
};
struct
c_vec32_2_2_t
{
union
VecType
{
struct
{
c_vec32_2_t
x
;
c_vec32_2_t
y
;
}
s
;
float
n
[
128
];
};
__host__
__device__
static
VecType
CreateVecZero
()
{
VecType
c
;
c
.
s
.
x
.
l
.
s
.
x
=
0
;
c
.
s
.
x
.
l
.
s
.
y
=
0
;
c
.
s
.
y
.
l
.
s
.
x
=
0
;
c
.
s
.
y
.
l
.
s
.
y
=
0
;
return
c
;
}
};
struct
c_vec32_1_t
{
union
VecType
{
struct
{
float32_t
x
;
}
s
;
float
n
[
32
];
};
__host__
__device__
static
VecType
CreateVecZero
()
{
VecType
c
;
c
.
s
.
x
=
0
;
return
c
;
}
};
struct
c_vec16_1_t
{
union
VecType
{
struct
{
float16_t
x
;
}
s
;
float
n
[
16
];
};
__host__
__device__
static
VecType
CreateVecZero
()
{
VecType
c
;
c
.
s
.
x
=
0
;
return
c
;
}
};
struct
c_vec4_2_t
{
union
VecType
{
struct
{
float4_t
x
;
float4_t
y
;
}
s
;
float
n
[
8
];
};
__host__
__device__
static
VecType
CreateVecZero
()
{
VecType
c
;
c
.
s
.
x
=
0
;
c
.
s
.
y
=
0
;
return
c
;
}
};
struct
c_vec4_1_t
{
union
VecType
{
struct
{
float4_t
x
;
}
s
;
float
n
[
4
];
};
__host__
__device__
static
VecType
CreateVecZero
()
{
VecType
c
;
c
.
s
.
x
=
0
;
return
c
;
}
};
// A, B, C, cbsz, abid, blgp
extern
"C"
__device__
float32_t
llvm_intrin_amdgcn_mfma_f32_32x32x1f32
(
float
,
float
,
float32_t
,
int
,
int
,
int
)
__asm
(
"llvm.amdgcn.mfma.f32.32x32x1f32"
);
...
...
@@ -499,6 +352,7 @@ struct intrin_mfma_f32_4x4x4f16<8, 64, COffset>
}
};
#if 0
template <index_t MPerWave, index_t NPerWave, index_t AStride, index_t BStride>
struct intrin_mfma_f32_32x32x2bf16;
...
...
@@ -638,5 +492,8 @@ struct intrin_mfma_f32_4x4x2bf16<8, 64>
return reg_c;
}
};
#endif
}
// namespace ck
#endif
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment