Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
yangql
composable_kernel-1
Commits
82fae390
"git@developer.sourcefind.cn:OpenDAS/vision.git" did not exist on "03eda6e53595d2fe3212270249551a11a70be4ef"
Commit
82fae390
authored
Jul 30, 2021
by
Chao Liu
Browse files
update to clang-format-10
parent
bd27ed6c
Changes
10
Hide whitespace changes
Inline
Side-by-side
Showing
10 changed files
with
38 additions
and
50 deletions
+38
-50
composable_kernel/include/tensor_description/dynamic_tensor_descriptor.hpp
.../include/tensor_description/dynamic_tensor_descriptor.hpp
+12
-14
composable_kernel/include/tensor_description/tensor_adaptor.hpp
...able_kernel/include/tensor_description/tensor_adaptor.hpp
+10
-12
composable_kernel/include/tensor_operation/blockwise_gemm_dlops_v3.hpp
...rnel/include/tensor_operation/blockwise_gemm_dlops_v3.hpp
+0
-1
composable_kernel/include/tensor_operation/threadwise_contraction_dlops.hpp
...include/tensor_operation/threadwise_contraction_dlops.hpp
+0
-2
composable_kernel/include/tensor_operation/threadwise_gemm_dlops_v3.hpp
...nel/include/tensor_operation/threadwise_gemm_dlops_v3.hpp
+0
-1
composable_kernel/include/utility/data_type_enum.hpp
composable_kernel/include/utility/data_type_enum.hpp
+2
-1
external/half/include/half.hpp
external/half/include/half.hpp
+6
-6
host/host_tensor/include/host_tensor.hpp
host/host_tensor/include/host_tensor.hpp
+6
-9
host/online_compilation/hip_utility/kernel_cache.cpp
host/online_compilation/hip_utility/kernel_cache.cpp
+1
-3
host/online_compilation/hip_utility/logger.cpp
host/online_compilation/hip_utility/logger.cpp
+1
-1
No files found.
composable_kernel/include/tensor_description/dynamic_tensor_descriptor.hpp
View file @
82fae390
...
@@ -33,13 +33,11 @@ struct DynamicTensorDescriptor
...
@@ -33,13 +33,11 @@ struct DynamicTensorDescriptor
__host__
__device__
static
constexpr
index_t
GetNumOfHiddenDimension
()
__host__
__device__
static
constexpr
index_t
GetNumOfHiddenDimension
()
{
{
constexpr
auto
all_low_dim_ids
=
constexpr
auto
all_low_dim_ids
=
unpack
(
unpack
([](
auto
&&
...
xs
)
constexpr
{
return
merge_sequences
(
xs
...);
},
[](
auto
&&
...
xs
)
constexpr
{
return
merge_sequences
(
xs
...);
},
LowerDimensionIdss
{});
LowerDimensionIdss
{});
constexpr
auto
all_up_dim_ids
=
constexpr
auto
all_up_dim_ids
=
unpack
(
unpack
([](
auto
&&
...
xs
)
constexpr
{
return
merge_sequences
(
xs
...);
},
[](
auto
&&
...
xs
)
constexpr
{
return
merge_sequences
(
xs
...);
},
UpperDimensionIdss
{});
UpperDimensionIdss
{});
constexpr
auto
all_dim_ids
=
merge_sequences
(
all_low_dim_ids
,
all_up_dim_ids
);
constexpr
auto
all_dim_ids
=
merge_sequences
(
all_low_dim_ids
,
all_up_dim_ids
);
...
@@ -347,22 +345,22 @@ transform_dynamic_tensor_descriptor(const OldTensorDescriptor& old_tensor_desc,
...
@@ -347,22 +345,22 @@ transform_dynamic_tensor_descriptor(const OldTensorDescriptor& old_tensor_desc,
constexpr
auto
up_dim_numbers_scan
=
merge_sequences
(
constexpr
auto
up_dim_numbers_scan
=
merge_sequences
(
Sequence
<
0
>
{},
inclusive_scan_sequence
(
up_dim_numbers
,
math
::
plus
<
index_t
>
{},
Number
<
0
>
{}));
Sequence
<
0
>
{},
inclusive_scan_sequence
(
up_dim_numbers
,
math
::
plus
<
index_t
>
{},
Number
<
0
>
{}));
constexpr
auto
up_dim_hidden_idss
=
constexpr
auto
up_dim_hidden_idss
=
generate_tuple
(
generate_tuple
(
[
old_hidden_dim_number
,
up_dim_numbers_scan
](
auto
i
)
constexpr
{
[
old_hidden_dim_number
,
up_dim_numbers_scan
](
auto
i
)
constexpr
{
return
return
typename
arithmetic_sequence_gen
<
old_hidden_dim_number
+
up_dim_numbers_scan
[
i
],
typename
arithmetic_sequence_gen
<
old_hidden_dim_number
+
up_dim_numbers_scan
[
i
],
old_hidden_dim_number
+
up_dim_numbers_scan
[
i
+
1
],
old_hidden_dim_number
+
up_dim_numbers_scan
[
i
+
1
],
1
>::
type
{};
1
>::
type
{};
},
},
Number
<
num_new_transform
>
{});
Number
<
num_new_transform
>
{});
// new visible dimension's hidden ids
// new visible dimension's hidden ids
constexpr
auto
unordered_new_visible_dim_hidden_ids
=
constexpr
auto
unordered_new_visible_dim_hidden_ids
=
unpack
(
unpack
(
[](
auto
...
xs
)
constexpr
{
return
merge_sequences
(
xs
...);
},
up_dim_hidden_idss
);
[](
auto
...
xs
)
constexpr
{
return
merge_sequences
(
xs
...);
},
up_dim_hidden_idss
);
constexpr
auto
new_visible_dim_unordered2ordered
=
constexpr
auto
new_visible_dim_unordered2ordered
=
unpack
(
unpack
(
[](
auto
...
xs
)
constexpr
{
return
merge_sequences
(
xs
...);
},
[](
auto
...
xs
)
constexpr
{
return
merge_sequences
(
xs
...);
},
NewUpperDimensionNewVisibleIdss
{});
NewUpperDimensionNewVisibleIdss
{});
constexpr
auto
new_visible_dim_hidden_ids
=
constexpr
auto
new_visible_dim_hidden_ids
=
unordered_new_visible_dim_hidden_ids
.
ReorderGivenOld2New
(
new_visible_dim_unordered2ordered
);
unordered_new_visible_dim_hidden_ids
.
ReorderGivenOld2New
(
new_visible_dim_unordered2ordered
);
...
...
composable_kernel/include/tensor_description/tensor_adaptor.hpp
View file @
82fae390
...
@@ -106,13 +106,13 @@ struct TensorAdaptor
...
@@ -106,13 +106,13 @@ struct TensorAdaptor
__host__
__device__
static
constexpr
index_t
GetNumOfHiddenDimension
()
__host__
__device__
static
constexpr
index_t
GetNumOfHiddenDimension
()
{
{
constexpr
auto
all_low_dim_ids
=
constexpr
auto
all_low_dim_ids
=
unpack
(
unpack
(
[](
auto
&&
...
xs
)
constexpr
{
return
merge_sequences
(
xs
...);
},
[](
auto
&&
...
xs
)
constexpr
{
return
merge_sequences
(
xs
...);
},
LowerDimensionHiddenIdss
{});
LowerDimensionHiddenIdss
{});
constexpr
auto
all_up_dim_ids
=
constexpr
auto
all_up_dim_ids
=
unpack
(
unpack
(
[](
auto
&&
...
xs
)
constexpr
{
return
merge_sequences
(
xs
...);
},
[](
auto
&&
...
xs
)
constexpr
{
return
merge_sequences
(
xs
...);
},
UpperDimensionHiddenIdss
{});
UpperDimensionHiddenIdss
{});
constexpr
auto
all_dim_ids
=
merge_sequences
(
all_low_dim_ids
,
all_up_dim_ids
);
constexpr
auto
all_dim_ids
=
merge_sequences
(
all_low_dim_ids
,
all_up_dim_ids
);
...
@@ -418,13 +418,11 @@ __host__ __device__ constexpr auto make_single_stage_tensor_adaptor(const Transf
...
@@ -418,13 +418,11 @@ __host__ __device__ constexpr auto make_single_stage_tensor_adaptor(const Transf
"wrong!"
);
"wrong!"
);
// sanity check on LowerDimensionOldTopIdss and UpperDimensionNewTopIdss
// sanity check on LowerDimensionOldTopIdss and UpperDimensionNewTopIdss
constexpr
auto
all_low_dim_old_top_ids
=
constexpr
auto
all_low_dim_old_top_ids
=
unpack
(
unpack
([](
auto
&&
...
xs
)
constexpr
{
return
merge_sequences
(
xs
...);
},
[](
auto
&&
...
xs
)
constexpr
{
return
merge_sequences
(
xs
...);
},
LowerDimensionOldTopIdss
{});
LowerDimensionOldTopIdss
{});
constexpr
auto
all_up_dim_new_top_ids
=
constexpr
auto
all_up_dim_new_top_ids
=
unpack
(
unpack
([](
auto
&&
...
xs
)
constexpr
{
return
merge_sequences
(
xs
...);
},
[](
auto
&&
...
xs
)
constexpr
{
return
merge_sequences
(
xs
...);
},
UpperDimensionNewTopIdss
{});
UpperDimensionNewTopIdss
{});
static_assert
(
is_valid_sequence_map
<
decltype
(
all_low_dim_old_top_ids
)
>::
value
&&
static_assert
(
is_valid_sequence_map
<
decltype
(
all_low_dim_old_top_ids
)
>::
value
&&
is_valid_sequence_map
<
decltype
(
all_up_dim_new_top_ids
)
>::
value
,
is_valid_sequence_map
<
decltype
(
all_up_dim_new_top_ids
)
>::
value
,
...
...
composable_kernel/include/tensor_operation/blockwise_gemm_dlops_v3.hpp
View file @
82fae390
...
@@ -152,7 +152,6 @@ struct BlockwiseGemmDlops_km_kn_m0m1n0n1_v3
...
@@ -152,7 +152,6 @@ struct BlockwiseGemmDlops_km_kn_m0m1n0n1_v3
static_for
<
0
,
EPerBlock
,
EPerThreadLoop
>
{}([
&
](
auto
e_begin
)
{
static_for
<
0
,
EPerBlock
,
EPerThreadLoop
>
{}([
&
](
auto
e_begin
)
{
static_for
<
0
,
KPerThread
,
KPerThreadSubC
>
{}([
&
](
auto
k_begin
)
{
static_for
<
0
,
KPerThread
,
KPerThreadSubC
>
{}([
&
](
auto
k_begin
)
{
a_thread_copy_
.
Run
(
a_block_mtx
,
a_thread_copy_
.
Run
(
a_block_mtx
,
make_tuple
(
e_begin
,
k_begin
),
make_tuple
(
e_begin
,
k_begin
),
a_block_buf
,
a_block_buf
,
...
...
composable_kernel/include/tensor_operation/threadwise_contraction_dlops.hpp
View file @
82fae390
...
@@ -87,7 +87,6 @@ struct ThreadwiseGemmDlops_km0m1_kn0n1_m0m1n0n1
...
@@ -87,7 +87,6 @@ struct ThreadwiseGemmDlops_km0m1_kn0n1_m0m1n0n1
static_for
<
0
,
TM1
,
1
>
{}([
&
](
auto
tm1
)
{
static_for
<
0
,
TM1
,
1
>
{}([
&
](
auto
tm1
)
{
static_for
<
0
,
TN0
,
1
>
{}([
&
](
auto
tn0
)
{
static_for
<
0
,
TN0
,
1
>
{}([
&
](
auto
tn0
)
{
static_for
<
0
,
TN1
,
1
>
{}([
&
](
auto
tn1
)
{
static_for
<
0
,
TN1
,
1
>
{}([
&
](
auto
tn1
)
{
constexpr
index_t
a_offset
=
constexpr
index_t
a_offset
=
AThreadDesc_TK0_TM0_TM1_TK1
{}.
CalculateOffset
(
AThreadDesc_TK0_TM0_TM1_TK1
{}.
CalculateOffset
(
a_origin_idx
+
make_multi_index
(
tk
,
tm0
,
tm1
));
a_origin_idx
+
make_multi_index
(
tk
,
tm0
,
tm1
));
...
@@ -192,7 +191,6 @@ struct ThreadwiseContractionDlops_A_TK0_TM0_TM1_TK1_B_TK0_TN0_TN1_TK1_C_TM0_TM1_
...
@@ -192,7 +191,6 @@ struct ThreadwiseContractionDlops_A_TK0_TM0_TM1_TK1_B_TK0_TN0_TN1_TK1_C_TM0_TM1_
static_for
<
0
,
TM1
,
1
>
{}([
&
](
auto
tm1
)
{
static_for
<
0
,
TM1
,
1
>
{}([
&
](
auto
tm1
)
{
static_for
<
0
,
TN0
,
1
>
{}([
&
](
auto
tn0
)
{
static_for
<
0
,
TN0
,
1
>
{}([
&
](
auto
tn0
)
{
static_for
<
0
,
TN1
,
1
>
{}([
&
](
auto
tn1
)
{
static_for
<
0
,
TN1
,
1
>
{}([
&
](
auto
tn1
)
{
vector_type
<
FloatA
,
TK1
>
a_vec
;
vector_type
<
FloatA
,
TK1
>
a_vec
;
vector_type
<
FloatB
,
TK1
>
b_vec
;
vector_type
<
FloatB
,
TK1
>
b_vec
;
...
...
composable_kernel/include/tensor_operation/threadwise_gemm_dlops_v3.hpp
View file @
82fae390
...
@@ -136,7 +136,6 @@ struct ThreadwiseGemmDlops_km_kn_mn_v3
...
@@ -136,7 +136,6 @@ struct ThreadwiseGemmDlops_km_kn_mn_v3
{
{
static_for
<
0
,
H
,
1
>
{}([
&
](
auto
h
)
{
static_for
<
0
,
H
,
1
>
{}([
&
](
auto
h
)
{
static_for
<
0
,
W
,
1
>
{}([
&
](
auto
w
)
{
static_for
<
0
,
W
,
1
>
{}([
&
](
auto
w
)
{
constexpr
index_t
b_offset
=
constexpr
index_t
b_offset
=
BDesc
{}.
CalculateOffset
(
b_origin_idx
+
make_tuple
(
e
,
0
,
h
,
w
));
BDesc
{}.
CalculateOffset
(
b_origin_idx
+
make_tuple
(
e
,
0
,
h
,
w
));
...
...
composable_kernel/include/utility/data_type_enum.hpp
View file @
82fae390
...
@@ -4,7 +4,8 @@
...
@@ -4,7 +4,8 @@
namespace
ck
{
namespace
ck
{
// this enumerate should be synchronized with include/miopen.h
// this enumerate should be synchronized with include/miopen.h
typedef
enum
{
typedef
enum
{
Half
=
0
,
Half
=
0
,
Float
=
1
,
Float
=
1
,
Int32
=
2
,
Int32
=
2
,
...
...
external/half/include/half.hpp
View file @
82fae390
...
@@ -2399,11 +2399,11 @@ unsigned int erf(unsigned int arg)
...
@@ -2399,11 +2399,11 @@ unsigned int erf(unsigned int arg)
template
<
std
::
float_round_style
R
,
bool
L
>
template
<
std
::
float_round_style
R
,
bool
L
>
unsigned
int
gamma
(
unsigned
int
arg
)
unsigned
int
gamma
(
unsigned
int
arg
)
{
{
/* static const double p[] ={ 2.50662827563479526904, 225.525584619175212544,
-268.295973841304927459, 80.9030806934622512966, -5.00757863970517583837, 0.0114684895434781459556 };
/* static const double p[] ={ 2.50662827563479526904, 225.525584619175212544,
double t = arg + 4.65, s = p[0];
-268.295973841304927459, 80.9030806934622512966, -5.00757863970517583837,
for(unsigned int i=0; i<5; ++i)
0.0114684895434781459556 }; double t = arg + 4.65, s = p[0];
for(unsigned int i=0; i<5; ++i)
s += p[i+1] / (arg+i);
s += p[i+1] / (arg+i);
return std::log(s) + (arg-0.5)*std::log(t) - t;
return std::log(s) + (arg-0.5)*std::log(t) - t;
*/
static
const
f31
pi
(
0xC90FDAA2
,
1
),
*/
static
const
f31
pi
(
0xC90FDAA2
,
1
),
lbe
(
0xB8AA3B29
,
0
);
lbe
(
0xB8AA3B29
,
0
);
unsigned
int
abs
=
arg
&
0x7FFF
,
sign
=
arg
&
0x8000
;
unsigned
int
abs
=
arg
&
0x7FFF
,
sign
=
arg
&
0x8000
;
...
@@ -2506,7 +2506,7 @@ unsigned int gamma(unsigned int arg)
...
@@ -2506,7 +2506,7 @@ unsigned int gamma(unsigned int arg)
template
<
typename
,
typename
,
std
::
float_round_style
>
template
<
typename
,
typename
,
std
::
float_round_style
>
struct
half_caster
;
struct
half_caster
;
}
}
// namespace detail
/// Half-precision floating-point type.
/// Half-precision floating-point type.
/// This class implements an IEEE-conformant half-precision floating-point type with the usual
/// This class implements an IEEE-conformant half-precision floating-point type with the usual
...
...
host/host_tensor/include/host_tensor.hpp
View file @
82fae390
...
@@ -39,7 +39,8 @@ std::ostream& LogRangeAsType(std::ostream& os, Range&& range, std::string delim)
...
@@ -39,7 +39,8 @@ std::ostream& LogRangeAsType(std::ostream& os, Range&& range, std::string delim)
return
os
;
return
os
;
}
}
typedef
enum
{
typedef
enum
{
Half
=
0
,
Half
=
0
,
Float
=
1
,
Float
=
1
,
}
DataType_t
;
}
DataType_t
;
...
@@ -227,27 +228,23 @@ struct Tensor
...
@@ -227,27 +228,23 @@ struct Tensor
{
{
switch
(
mDesc
.
GetNumOfDimension
())
switch
(
mDesc
.
GetNumOfDimension
())
{
{
case
1
:
case
1
:
{
{
auto
f
=
[
&
](
auto
i
)
{
(
*
this
)(
i
)
=
g
(
i
);
};
auto
f
=
[
&
](
auto
i
)
{
(
*
this
)(
i
)
=
g
(
i
);
};
make_ParallelTensorFunctor
(
f
,
mDesc
.
GetLengths
()[
0
])(
num_thread
);
make_ParallelTensorFunctor
(
f
,
mDesc
.
GetLengths
()[
0
])(
num_thread
);
break
;
break
;
}
}
case
2
:
case
2
:
{
{
auto
f
=
[
&
](
auto
i0
,
auto
i1
)
{
(
*
this
)(
i0
,
i1
)
=
g
(
i0
,
i1
);
};
auto
f
=
[
&
](
auto
i0
,
auto
i1
)
{
(
*
this
)(
i0
,
i1
)
=
g
(
i0
,
i1
);
};
make_ParallelTensorFunctor
(
f
,
mDesc
.
GetLengths
()[
0
],
mDesc
.
GetLengths
()[
1
])(
num_thread
);
make_ParallelTensorFunctor
(
f
,
mDesc
.
GetLengths
()[
0
],
mDesc
.
GetLengths
()[
1
])(
num_thread
);
break
;
break
;
}
}
case
3
:
case
3
:
{
{
auto
f
=
[
&
](
auto
i0
,
auto
i1
,
auto
i2
)
{
(
*
this
)(
i0
,
i1
,
i2
)
=
g
(
i0
,
i1
,
i2
);
};
auto
f
=
[
&
](
auto
i0
,
auto
i1
,
auto
i2
)
{
(
*
this
)(
i0
,
i1
,
i2
)
=
g
(
i0
,
i1
,
i2
);
};
make_ParallelTensorFunctor
(
make_ParallelTensorFunctor
(
f
,
mDesc
.
GetLengths
()[
0
],
mDesc
.
GetLengths
()[
1
],
mDesc
.
GetLengths
()[
2
])(
num_thread
);
f
,
mDesc
.
GetLengths
()[
0
],
mDesc
.
GetLengths
()[
1
],
mDesc
.
GetLengths
()[
2
])(
num_thread
);
break
;
break
;
}
}
case
4
:
case
4
:
{
{
auto
f
=
[
&
](
auto
i0
,
auto
i1
,
auto
i2
,
auto
i3
)
{
auto
f
=
[
&
](
auto
i0
,
auto
i1
,
auto
i2
,
auto
i3
)
{
(
*
this
)(
i0
,
i1
,
i2
,
i3
)
=
g
(
i0
,
i1
,
i2
,
i3
);
(
*
this
)(
i0
,
i1
,
i2
,
i3
)
=
g
(
i0
,
i1
,
i2
,
i3
);
};
};
...
...
host/online_compilation/hip_utility/kernel_cache.cpp
View file @
82fae390
...
@@ -145,9 +145,7 @@ void KernelCache::ClearKernels(const std::string& algorithm, const std::string&
...
@@ -145,9 +145,7 @@ void KernelCache::ClearKernels(const std::string& algorithm, const std::string&
}
}
const
std
::
pair
<
std
::
string
,
std
::
string
>
key
=
std
::
make_pair
(
algorithm
,
network_config
);
const
std
::
pair
<
std
::
string
,
std
::
string
>
key
=
std
::
make_pair
(
algorithm
,
network_config
);
auto
&&
v
=
this
->
kernel_map
[
key
];
auto
&&
v
=
this
->
kernel_map
[
key
];
if
(
!
v
.
empty
())
if
(
!
v
.
empty
())
{}
{
}
v
.
clear
();
v
.
clear
();
}
}
...
...
host/online_compilation/hip_utility/logger.cpp
View file @
82fae390
...
@@ -40,4 +40,4 @@ ostream& fdt_log(LogLevel level, const char* header, const char* content)
...
@@ -40,4 +40,4 @@ ostream& fdt_log(LogLevel level, const char* header, const char* content)
ostream
&
fdt_log
()
{
return
(
cerr
);
};
ostream
&
fdt_log
()
{
return
(
cerr
);
};
void
fdt_log_flush
()
{
cerr
<<
endl
;
}
void
fdt_log_flush
()
{
cerr
<<
endl
;
}
};
};
// namespace olCompile
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment