Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
b79df771
Commit
b79df771
authored
Jul 12, 2022
by
carlushuang
Browse files
Merge remote-tracking branch 'origin/develop' into cpu_avx2
parents
05d38218
63914743
Changes
450
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
129 additions
and
57 deletions
+129
-57
include/ck/utility/container_element_picker.hpp
include/ck/utility/container_element_picker.hpp
+3
-0
include/ck/utility/container_helper.hpp
include/ck/utility/container_helper.hpp
+3
-0
include/ck/utility/data_type.hpp
include/ck/utility/data_type.hpp
+16
-4
include/ck/utility/debug.hpp
include/ck/utility/debug.hpp
+12
-4
include/ck/utility/dynamic_buffer.hpp
include/ck/utility/dynamic_buffer.hpp
+5
-1
include/ck/utility/enable_if.hpp
include/ck/utility/enable_if.hpp
+4
-3
include/ck/utility/functional.hpp
include/ck/utility/functional.hpp
+6
-5
include/ck/utility/functional2.hpp
include/ck/utility/functional2.hpp
+6
-5
include/ck/utility/functional3.hpp
include/ck/utility/functional3.hpp
+9
-7
include/ck/utility/functional4.hpp
include/ck/utility/functional4.hpp
+3
-0
include/ck/utility/generic_memory_space_atomic.hpp
include/ck/utility/generic_memory_space_atomic.hpp
+3
-0
include/ck/utility/get_id.hpp
include/ck/utility/get_id.hpp
+5
-1
include/ck/utility/ignore.hpp
include/ck/utility/ignore.hpp
+3
-0
include/ck/utility/inner_product.hpp
include/ck/utility/inner_product.hpp
+3
-0
include/ck/utility/integral_constant.hpp
include/ck/utility/integral_constant.hpp
+4
-3
include/ck/utility/is_known_at_compile_time.hpp
include/ck/utility/is_known_at_compile_time.hpp
+5
-4
include/ck/utility/magic_division.hpp
include/ck/utility/magic_division.hpp
+5
-5
include/ck/utility/math.hpp
include/ck/utility/math.hpp
+23
-5
include/ck/utility/math_v2.hpp
include/ck/utility/math_v2.hpp
+7
-6
include/ck/utility/multi_index.hpp
include/ck/utility/multi_index.hpp
+4
-4
No files found.
Too many changes to show.
To preserve performance only
450 of 450+
files are displayed.
Plain diff
Email patch
include/ck/utility/container_element_picker.hpp
View file @
b79df771
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#ifndef CK_CONTAINER_ELEMENT_PICKER_HPP
#define CK_CONTAINER_ELEMENT_PICKER_HPP
...
...
include/ck/utility/container_helper.hpp
View file @
b79df771
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#ifndef CK_CONTAINER_HELPER_HPP
#define CK_CONTAINER_HELPER_HPP
...
...
include/ck/utility/data_type.hpp
View file @
b79df771
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include "statically_indexed_array.hpp"
#include "ck/utility/statically_indexed_array.hpp"
#ifdef CK_NOGPU
#include "half.hpp"
#endif
...
...
@@ -942,14 +946,14 @@ using int8x64_t = typename vector_type<int8_t, 64>::type;
// Convert X to Y
template
<
typename
Y
,
typename
X
>
__host__
__device__
Y
type_convert
(
X
x
)
__host__
__device__
constexpr
Y
type_convert
(
X
x
)
{
return
static_cast
<
Y
>
(
x
);
}
// convert bfp16 to fp32
template
<
>
inline
__host__
__device__
float
type_convert
<
float
,
bhalf_t
>
(
bhalf_t
x
)
inline
__host__
__device__
constexpr
float
type_convert
<
float
,
bhalf_t
>
(
bhalf_t
x
)
{
union
{
...
...
@@ -962,7 +966,7 @@ inline __host__ __device__ float type_convert<float, bhalf_t>(bhalf_t x)
// convert fp32 to bfp16
template
<
>
inline
__host__
__device__
bhalf_t
type_convert
<
bhalf_t
,
float
>
(
float
x
)
inline
__host__
__device__
constexpr
bhalf_t
type_convert
<
bhalf_t
,
float
>
(
float
x
)
{
union
{
...
...
@@ -1014,6 +1018,11 @@ struct NumericLimits
__host__
__device__
static
constexpr
T
Max
()
{
return
std
::
numeric_limits
<
T
>::
max
();
}
__host__
__device__
static
constexpr
T
Lowest
()
{
return
std
::
numeric_limits
<
T
>::
lowest
();
}
__host__
__device__
static
constexpr
T
QuietNaN
()
{
return
std
::
numeric_limits
<
T
>::
quiet_NaN
();
}
};
template
<
>
...
...
@@ -1022,12 +1031,15 @@ struct NumericLimits<half_t>
static
constexpr
unsigned
short
binary_min
=
0x0400
;
static
constexpr
unsigned
short
binary_max
=
0x7BFF
;
static
constexpr
unsigned
short
binary_lowest
=
0xFBFF
;
static
constexpr
unsigned
short
binary_qnan
=
0x7FFF
;
__host__
__device__
static
constexpr
half_t
Min
()
{
return
bit_cast
<
half_t
>
(
binary_min
);
}
__host__
__device__
static
constexpr
half_t
Max
()
{
return
bit_cast
<
half_t
>
(
binary_max
);
}
__host__
__device__
static
constexpr
half_t
Lowest
()
{
return
bit_cast
<
half_t
>
(
binary_lowest
);
}
__host__
__device__
static
constexpr
half_t
QuietNaN
()
{
return
bit_cast
<
half_t
>
(
binary_qnan
);
}
};
}
// namespace ck
include/ck/utility/debug.hpp
View file @
b79df771
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#ifndef UTILITY_DEBUG_HPP
#define UTILITY_DEBUG_HPP
#ifndef CK_NOGPU
...
...
@@ -9,21 +12,27 @@ template <typename T, typename Enable = void>
struct
PrintAsType
;
template
<
typename
T
>
struct
PrintAsType
<
T
,
typename
std
::
enable_if
<
std
::
is_floating_point
<
T
>::
value
>::
valu
e
>
struct
PrintAsType
<
T
,
typename
std
::
enable_if
<
std
::
is_floating_point
<
T
>::
value
>::
typ
e
>
{
using
type
=
float
;
__host__
__device__
static
void
Print
(
const
T
&
p
)
{
printf
(
"%.3f "
,
static_cast
<
type
>
(
p
));
}
};
template
<
>
struct
PrintAsType
<
ck
::
half_t
,
void
>
{
using
type
=
float
;
__host__
__device__
static
void
Print
(
const
ck
::
half_t
&
p
)
{
printf
(
"%.3f "
,
static_cast
<
type
>
(
p
));
}
};
template
<
typename
T
>
struct
PrintAsType
<
T
,
typename
std
::
enable_if
<
std
::
is_integral
<
T
>::
value
>::
valu
e
>
struct
PrintAsType
<
T
,
typename
std
::
enable_if
<
std
::
is_integral
<
T
>::
value
>::
typ
e
>
{
using
type
=
int
;
__host__
__device__
static
void
Print
(
const
T
&
p
)
{
printf
(
"%d "
,
static_cast
<
type
>
(
p
));
}
};
}
// namespace detail
...
...
@@ -38,7 +47,6 @@ struct PrintAsType<T, typename std::enable_if<std::is_integral<T>::value>::value
template
<
typename
T
,
index_t
element_stride
=
1
,
index_t
row_bytes
=
128
>
__device__
void
print_shared
(
T
const
*
p_shared
,
index_t
num_elements
)
{
using
PrintType
=
typename
detail
::
PrintAsType
<
T
>::
type
;
constexpr
index_t
row_elements
=
row_bytes
/
sizeof
(
T
);
static_assert
((
element_stride
>=
1
&&
element_stride
<=
row_elements
),
"element_stride should between [1, row_elements]"
);
...
...
@@ -60,7 +68,7 @@ __device__ void print_shared(T const* p_shared, index_t num_elements)
printf
(
"elem %5d: "
,
i
);
for
(
index_t
j
=
0
;
j
<
row_elements
;
j
+=
element_stride
)
{
printf
(
"%.0f "
,
static_cast
<
PrintType
>
(
p_shared
[
i
+
j
])
)
;
detail
::
Print
As
Type
<
T
>::
Print
(
p_shared
[
i
+
j
]);
}
printf
(
"
\n
"
);
...
...
include/ck/utility/dynamic_buffer.hpp
View file @
b79df771
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include "config.hpp"
#include "ck/ck.hpp"
#include "enable_if.hpp"
#include "c_style_pointer_cast.hpp"
#include "amd_buffer_addressing.hpp"
...
...
include/ck/utility/enable_if.hpp
View file @
b79df771
#ifndef CK_ENABLE_IF_HPP
#define CK_ENABLE_IF_HPP
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
namespace
ck
{
...
...
@@ -10,4 +12,3 @@ template <bool B, typename T = void>
using
enable_if_t
=
typename
std
::
enable_if
<
B
,
T
>::
type
;
}
// namespace ck
#endif
include/ck/utility/functional.hpp
View file @
b79df771
#ifndef CK_FUNCTIONAL_HPP
#define CK_FUNCTIONAL_HPP
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "integral_constant.hpp"
#include "type.hpp"
#pragma once
#include "ck/utility/integral_constant.hpp"
#include "ck/utility/type.hpp"
namespace
ck
{
...
...
@@ -113,4 +115,3 @@ template <bool predicate, class X, class Y>
using
conditional_t
=
typename
conditional
<
predicate
,
X
,
Y
>::
type
;
}
// namespace ck
#endif
include/ck/utility/functional2.hpp
View file @
b79df771
#ifndef CK_FUNCTIONAL2_HPP
#define CK_FUNCTIONAL2_HPP
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "functional.hpp"
#include "sequence.hpp"
#pragma once
#include "ck/utility/functional.hpp"
#include "ck/utility/sequence.hpp"
namespace
ck
{
...
...
@@ -45,4 +47,3 @@ struct static_for
};
}
// namespace ck
#endif
include/ck/utility/functional3.hpp
View file @
b79df771
#ifndef CK_FUNCTIONAL3_HPP
#define CK_FUNCTIONAL3_HPP
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "functional.hpp"
#include "functional2.hpp"
#include "sequence.hpp"
#include "multi_index.hpp"
#pragma once
#include "ck/ck.hpp"
#include "ck/utility/functional.hpp"
#include "ck/utility/functional2.hpp"
#include "ck/utility/sequence.hpp"
#include "ck/utility/multi_index.hpp"
namespace
ck
{
...
...
@@ -139,4 +142,3 @@ struct ford
};
}
// namespace ck
#endif
include/ck/utility/functional4.hpp
View file @
b79df771
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#ifndef CK_FUNCTIONAL4_HPP
#define CK_FUNCTIONAL4_HPP
...
...
include/ck/utility/generic_memory_space_atomic.hpp
View file @
b79df771
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include "data_type.hpp"
...
...
include/ck/utility/get_id.hpp
View file @
b79df771
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include "config.hpp"
#include "ck/ck.hpp"
#ifndef CK_NOGPU
namespace
ck
{
...
...
include/ck/utility/ignore.hpp
View file @
b79df771
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#ifndef CK_IGNORE_HPP
#define CK_IGNORE_HPP
...
...
include/ck/utility/inner_product.hpp
View file @
b79df771
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include "data_type.hpp"
#ifndef CK_NOGPU
...
...
include/ck/utility/integral_constant.hpp
View file @
b79df771
#ifndef CK_INTEGRAL_CONSTANT_HPP
#define CK_INTEGRAL_CONSTANT_HPP
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
namespace
ck
{
...
...
@@ -47,4 +49,3 @@ __host__ __device__ constexpr auto operator%(integral_constant<TX, X>, integral_
}
}
// namespace ck
#endif
include/ck/utility/is_known_at_compile_time.hpp
View file @
b79df771
#ifndef IS_KNOWN_AT_COMPILE_TIME_HPP
#define IS_KNOWN_AT_COMPILE_TIME_HPP
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "config.hpp"
#pragma once
#include "ck/ck.hpp"
#include "integral_constant.hpp"
#include "sequence.hpp"
#include "tuple.hpp"
...
...
@@ -52,4 +54,3 @@ struct is_known_at_compile_time<Tuple<Ts...>>
};
}
// namespace ck
#endif
include/ck/utility/magic_division.hpp
View file @
b79df771
#ifndef CK_MAGIC_DIVISION_HPP
#define CK_MAGIC_DIVISION_HPP
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "config.hpp"
#pragma once
#include "ck/ck.hpp"
#include "integral_constant.hpp"
#include "number.hpp"
#include "type.hpp"
...
...
@@ -158,5 +160,3 @@ struct MagicDivision
};
}
// namespace ck
#endif
include/ck/utility/math.hpp
View file @
b79df771
#ifndef CK_MATH_HPP
#define CK_MATH_HPP
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#include "config.hpp"
#pragma once
#include "ck/ck.hpp"
#include "integral_constant.hpp"
#include "number.hpp"
#include "type.hpp"
...
...
@@ -142,6 +144,24 @@ __host__ __device__ constexpr auto min(X x, Ys... ys)
return
min
(
x
,
min
(
ys
...));
}
// disallow implicit type casting
template
<
typename
T
>
__device__
T
exp
(
T
x
);
// TODO: add f16 support using v_exp_f16
template
<
>
__device__
float
exp
<
float
>
(
float
x
)
{
return
__expf
(
x
);
}
template
<
>
__device__
double
exp
<
double
>
(
double
x
)
{
return
exp
(
x
);
}
// greatest common divisor, aka highest common factor
__host__
__device__
constexpr
index_t
gcd
(
index_t
x
,
index_t
y
)
{
...
...
@@ -212,5 +232,3 @@ struct less
}
// namespace math
}
// namespace ck
#endif
include/ck/utility/math_v2.hpp
View file @
b79df771
#ifndef CK_MATH_V2_HPP
#define CK_MATH_V2_HPP
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include <cmath>
#include "data_type.hpp"
#include "type.hpp"
#include "ck/utility/data_type.hpp"
#include "ck/utility/type.hpp"
namespace
ck
{
namespace
math
{
...
...
@@ -112,5 +115,3 @@ static inline __device__ double sqrt(double x) { return ::sqrt(x); };
}
// namespace math
}
// namespace ck
#endif
include/ck/utility/multi_index.hpp
View file @
b79df771
#ifndef CK_MULTI_INDEX_HPP
#define CK_MULTI_INDEX_HPP
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-2022, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
#include "common_header.hpp"
...
...
@@ -8,5 +10,3 @@
#else
#include "statically_indexed_array_multi_index.hpp"
#endif
#endif
Prev
1
…
9
10
11
12
13
14
15
16
17
…
23
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment