Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
7d3c5ea4
Unverified
Commit
7d3c5ea4
authored
Jun 25, 2023
by
zjing14
Committed by
GitHub
Jun 25, 2023
Browse files
Merge branch 'develop' into lwpck-537
parents
981b8549
3b18f1e3
Changes
1000
Hide whitespace changes
Inline
Side-by-side
Showing
20 changed files
with
76 additions
and
53 deletions
+76
-53
library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_avg.hpp
...uce/device_reduce_instance_threadwise_f32_f32_f32_avg.hpp
+1
-1
library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_max.hpp
...uce/device_reduce_instance_threadwise_f32_f32_f32_max.hpp
+1
-1
library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_min.hpp
...uce/device_reduce_instance_threadwise_f32_f32_f32_min.hpp
+1
-1
library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_norm2.hpp
...e/device_reduce_instance_threadwise_f32_f32_f32_norm2.hpp
+1
-1
library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_add.hpp
...uce/device_reduce_instance_threadwise_f32_f64_f32_add.hpp
+1
-1
library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_avg.hpp
...uce/device_reduce_instance_threadwise_f32_f64_f32_avg.hpp
+1
-1
library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_norm2.hpp
...e/device_reduce_instance_threadwise_f32_f64_f32_norm2.hpp
+1
-1
library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_add.hpp
...uce/device_reduce_instance_threadwise_f64_f64_f64_add.hpp
+1
-1
library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_amax.hpp
...ce/device_reduce_instance_threadwise_f64_f64_f64_amax.hpp
+1
-1
library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_avg.hpp
...uce/device_reduce_instance_threadwise_f64_f64_f64_avg.hpp
+1
-1
library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_max.hpp
...uce/device_reduce_instance_threadwise_f64_f64_f64_max.hpp
+1
-1
library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_min.hpp
...uce/device_reduce_instance_threadwise_f64_f64_f64_min.hpp
+1
-1
library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_norm2.hpp
...e/device_reduce_instance_threadwise_f64_f64_f64_norm2.hpp
+1
-1
library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_add.hpp
...educe/device_reduce_instance_threadwise_i8_i32_i8_add.hpp
+1
-1
library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_avg.hpp
...educe/device_reduce_instance_threadwise_i8_i32_i8_avg.hpp
+1
-1
library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_amax.hpp
...educe/device_reduce_instance_threadwise_i8_i8_i8_amax.hpp
+1
-1
library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_max.hpp
...reduce/device_reduce_instance_threadwise_i8_i8_i8_max.hpp
+1
-1
library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_min.hpp
...reduce/device_reduce_instance_threadwise_i8_i8_i8_min.hpp
+1
-1
library/include/ck/library/tensor_operation_instance/gpu/reduce/reduce.hpp
...k/library/tensor_operation_instance/gpu/reduce/reduce.hpp
+1
-1
library/include/ck/library/tensor_operation_instance/gpu/softmax.hpp
...lude/ck/library/tensor_operation_instance/gpu/softmax.hpp
+57
-34
No files found.
Too many changes to show.
To preserve performance only
1000 of 1000+
files are displayed.
Plain diff
Email patch
library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_avg.hpp
View file @
7d3c5ea4
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
...
...
library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_max.hpp
View file @
7d3c5ea4
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
...
...
library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_min.hpp
View file @
7d3c5ea4
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
...
...
library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f32_f32_norm2.hpp
View file @
7d3c5ea4
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
...
...
library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_add.hpp
View file @
7d3c5ea4
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
...
...
library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_avg.hpp
View file @
7d3c5ea4
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
...
...
library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f32_f64_f32_norm2.hpp
View file @
7d3c5ea4
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
...
...
library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_add.hpp
View file @
7d3c5ea4
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
...
...
library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_amax.hpp
View file @
7d3c5ea4
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
...
...
library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_avg.hpp
View file @
7d3c5ea4
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
...
...
library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_max.hpp
View file @
7d3c5ea4
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
...
...
library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_min.hpp
View file @
7d3c5ea4
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
...
...
library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_f64_f64_f64_norm2.hpp
View file @
7d3c5ea4
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
...
...
library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_add.hpp
View file @
7d3c5ea4
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
...
...
library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i32_i8_avg.hpp
View file @
7d3c5ea4
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
...
...
library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_amax.hpp
View file @
7d3c5ea4
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
...
...
library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_max.hpp
View file @
7d3c5ea4
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
...
...
library/include/ck/library/tensor_operation_instance/gpu/reduce/device_reduce_instance_threadwise_i8_i8_i8_min.hpp
View file @
7d3c5ea4
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
...
...
library/include/ck/library/tensor_operation_instance/gpu/reduce/reduce.hpp
View file @
7d3c5ea4
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
...
...
library/include/ck/library/tensor_operation_instance/gpu/softmax.hpp
View file @
7d3c5ea4
// SPDX-License-Identifier: MIT
// Copyright (c) 2018-202
2
, Advanced Micro Devices, Inc. All rights reserved.
// Copyright (c) 2018-202
3
, Advanced Micro Devices, Inc. All rights reserved.
#pragma once
...
...
@@ -9,34 +9,33 @@
#include "ck/ck.hpp"
#include "ck/library/tensor_operation_instance/device_operation_instance_factory.hpp"
#include "ck/tensor_operation/gpu/device/device_softmax.hpp"
#include "ck/library/tensor_operation_instance/gpu/softmax/device_softmax_instance.hpp"
namespace
ck
{
namespace
tensor_operation
{
namespace
device
{
namespace
instance
{
void
add_device_softmax_f16_f16_rank3_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F16
,
F32
,
F16
,
PassThrough
,
PassThrough
,
3
>>&
);
void
add_device_softmax_f16_f16_rank4_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F16
,
F32
,
F16
,
PassThrough
,
PassThrough
,
4
>>&
);
void
add_device_softmax_f32_f32_rank3_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
3
>>&
);
void
add_device_softmax_f32_f32_rank4_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
F32
,
F32
,
F32
,
PassThrough
,
PassThrough
,
4
>>&
);
void
add_device_softmax_i8_i8_rank3_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
I8
,
F32
,
I8
,
PassThrough
,
PassThrough
,
3
>>&
);
void
add_device_softmax_i8_i8_rank4_instances
(
std
::
vector
<
DeviceSoftmaxPtr
<
I8
,
F32
,
I8
,
PassThrough
,
PassThrough
,
4
>>&
);
template
<
typename
InDataType
,
typename
AccDataType
,
typename
OutDataType
,
index_t
Rank
>
struct
DeviceOperationInstanceFactory
<
ck
::
tensor_operation
::
device
::
DeviceSoftmax
<
InDataType
,
AccDataType
,
OutDataType
,
PassThrough
,
PassThrough
,
Rank
>>
template
<
typename
InDataType
,
typename
AccDataType
,
typename
OutDataType
,
index_t
Rank
,
index_t
NumReduceDim
>
struct
DeviceOperationInstanceFactory
<
ck
::
tensor_operation
::
device
::
DeviceSoftmax
<
InDataType
,
AccDataType
,
OutDataType
,
PassThrough
,
PassThrough
,
Rank
,
NumReduceDim
>>
{
using
DeviceOp
=
DeviceSoftmax
<
InDataType
,
AccDataType
,
OutDataType
,
PassThrough
,
PassThrough
,
Rank
>
;
using
DeviceOp
=
DeviceSoftmax
<
InDataType
,
AccDataType
,
OutDataType
,
PassThrough
,
PassThrough
,
Rank
,
NumReduceDim
>
;
static
auto
GetInstances
()
{
...
...
@@ -46,25 +45,49 @@ struct DeviceOperationInstanceFactory<
std
::
is_same_v
<
OutDataType
,
F16
>
)
{
if
constexpr
(
Rank
==
3
)
add_device_softmax_f16_f16_rank3_instances
(
op_ptrs
);
{
if
constexpr
(
NumReduceDim
==
1
)
add_device_softmax_f16_f16_rank3_reduce1_instances
(
op_ptrs
);
else
if
constexpr
(
NumReduceDim
==
2
)
add_device_softmax_f16_f16_rank3_reduce2_instances
(
op_ptrs
);
else
if
constexpr
(
NumReduceDim
==
3
)
add_device_softmax_f16_f16_rank3_reduce3_instances
(
op_ptrs
);
}
else
if
constexpr
(
Rank
==
4
)
add_device_softmax_f16_f16_rank4_instances
(
op_ptrs
);
{
if
constexpr
(
NumReduceDim
==
1
)
add_device_softmax_f16_f16_rank4_reduce1_instances
(
op_ptrs
);
else
if
constexpr
(
NumReduceDim
==
2
)
add_device_softmax_f16_f16_rank4_reduce2_instances
(
op_ptrs
);
else
if
constexpr
(
NumReduceDim
==
3
)
add_device_softmax_f16_f16_rank4_reduce3_instances
(
op_ptrs
);
else
if
constexpr
(
NumReduceDim
==
4
)
add_device_softmax_f16_f16_rank4_reduce4_instances
(
op_ptrs
);
}
}
else
if
constexpr
(
std
::
is_same_v
<
InDataType
,
F32
>
&&
std
::
is_same_v
<
AccDataType
,
F32
>
&&
std
::
is_same_v
<
OutDataType
,
F32
>
)
{
if
constexpr
(
Rank
==
3
)
add_device_softmax_f32_f32_rank3_instances
(
op_ptrs
);
else
if
constexpr
(
Rank
==
4
)
add_device_softmax_f32_f32_rank4_instances
(
op_ptrs
);
}
else
if
constexpr
(
std
::
is_same_v
<
InDataType
,
I8
>
&&
std
::
is_same_v
<
AccDataType
,
F32
>
&&
std
::
is_same_v
<
OutDataType
,
I8
>
)
{
if
constexpr
(
Rank
==
3
)
add_device_softmax_i8_i8_rank3_instances
(
op_ptrs
);
{
if
constexpr
(
NumReduceDim
==
1
)
add_device_softmax_f32_f32_rank3_reduce1_instances
(
op_ptrs
);
else
if
constexpr
(
NumReduceDim
==
2
)
add_device_softmax_f32_f32_rank3_reduce2_instances
(
op_ptrs
);
else
if
constexpr
(
NumReduceDim
==
3
)
add_device_softmax_f32_f32_rank3_reduce3_instances
(
op_ptrs
);
}
else
if
constexpr
(
Rank
==
4
)
add_device_softmax_i8_i8_rank4_instances
(
op_ptrs
);
{
if
constexpr
(
NumReduceDim
==
1
)
add_device_softmax_f32_f32_rank4_reduce1_instances
(
op_ptrs
);
else
if
constexpr
(
NumReduceDim
==
2
)
add_device_softmax_f32_f32_rank4_reduce2_instances
(
op_ptrs
);
else
if
constexpr
(
NumReduceDim
==
3
)
add_device_softmax_f32_f32_rank4_reduce3_instances
(
op_ptrs
);
else
if
constexpr
(
NumReduceDim
==
4
)
add_device_softmax_f32_f32_rank4_reduce4_instances
(
op_ptrs
);
}
}
return
op_ptrs
;
...
...
Prev
1
…
34
35
36
37
38
39
40
41
42
…
50
Next
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment