Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
gaoqiong
composable_kernel
Commits
ed4912f2
Commit
ed4912f2
authored
Jun 16, 2023
by
rocking
Browse files
Calculate gridSize according to the number of CU.
Remove useless header
parent
38962b98
Changes
2
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
12 additions
and
15 deletions
+12
-15
include/ck/tensor_operation/gpu/device/impl/device_put_element_impl.hpp
...sor_operation/gpu/device/impl/device_put_element_impl.hpp
+11
-11
library/include/ck/library/reference_tensor_operation/cpu/reference_maxpool_bwd.hpp
.../reference_tensor_operation/cpu/reference_maxpool_bwd.hpp
+1
-4
No files found.
include/ck/tensor_operation/gpu/device/impl/device_put_element_impl.hpp
View file @
ed4912f2
...
@@ -8,11 +8,11 @@
...
@@ -8,11 +8,11 @@
#include "ck/tensor_description/tensor_descriptor.hpp"
#include "ck/tensor_description/tensor_descriptor.hpp"
#include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "ck/tensor_description/tensor_descriptor_helper.hpp"
#include "ck/tensor_operation/gpu/device/reduction_operator_mapping.hpp"
#include "ck/tensor_operation/gpu/device/device_put_element.hpp"
#include "ck/tensor_operation/gpu/device/device_put_element.hpp"
#include "ck/tensor_operation/gpu/grid/gridwise_put_element_1d.hpp"
#include "ck/tensor_operation/gpu/grid/gridwise_put_element_1d.hpp"
#include "ck/host_utility/device_prop.hpp"
#include "ck/host_utility/device_prop.hpp"
#include "ck/host_utility/kernel_launch.hpp"
#include "ck/host_utility/kernel_launch.hpp"
#include "ck/host_utility/stream_utility.hpp"
namespace
ck
{
namespace
ck
{
namespace
tensor_operation
{
namespace
tensor_operation
{
...
@@ -70,27 +70,28 @@ struct DevicePutElementImpl
...
@@ -70,27 +70,28 @@ struct DevicePutElementImpl
:
p_input_
{
p_input
},
:
p_input_
{
p_input
},
p_indices_
{
p_indices
},
p_indices_
{
p_indices
},
p_output_
{
p_output
},
p_output_
{
p_output
},
input_length_raw_
{
input_length
},
elementwise_op_
{
elementwise_op
},
elementwise_op_
{
elementwise_op
},
blockSize_
{
256
},
blockSize_
{
256
}
gridSize_
{
104
}
// FIXME - Calculate the grid size by number of CU in the future
{
{
in_grid_desc_
=
MakeDescriptor_M
(
input_length
,
gridSize_
,
blockSize_
);
}
}
const
InDataType
*
p_input_
;
const
InDataType
*
p_input_
;
const
IndexDataType
*
p_indices_
;
const
IndexDataType
*
p_indices_
;
OutDataType
*
p_output_
;
OutDataType
*
p_output_
;
index_t
input_length_raw_
;
ElementwiseOperation
elementwise_op_
;
ElementwiseOperation
elementwise_op_
;
index_t
blockSize_
;
index_t
blockSize_
;
index_t
gridSize_
;
InGrid1dDesc
in_grid_desc_
;
};
};
struct
Invoker
:
public
BaseInvoker
struct
Invoker
:
public
BaseInvoker
{
{
float
Run
(
const
Argument
&
arg
,
const
StreamConfig
&
stream_config
=
StreamConfig
{})
float
Run
(
const
Argument
&
arg
,
const
StreamConfig
&
stream_config
=
StreamConfig
{})
{
{
index_t
gridSize
=
getAvailableComputeUnitCount
(
stream_config
);
InGrid1dDesc
in_grid_desc
=
MakeDescriptor_M
(
arg
.
input_length_raw_
,
gridSize
,
arg
.
blockSize_
);
const
auto
kernel
=
kernel_put_element_1d
<
GridwisePutElement
,
const
auto
kernel
=
kernel_put_element_1d
<
GridwisePutElement
,
InGrid1dDesc
,
InGrid1dDesc
,
InDataType
,
InDataType
,
...
@@ -100,10 +101,10 @@ struct DevicePutElementImpl
...
@@ -100,10 +101,10 @@ struct DevicePutElementImpl
float
elapsed_time
=
launch_and_time_kernel
(
stream_config
,
float
elapsed_time
=
launch_and_time_kernel
(
stream_config
,
kernel
,
kernel
,
dim3
(
arg
.
gridSize
_
),
dim3
(
gridSize
),
dim3
(
arg
.
blockSize_
),
dim3
(
arg
.
blockSize_
),
0
,
0
,
arg
.
in_grid_desc
_
,
in_grid_desc
,
arg
.
p_input_
,
arg
.
p_input_
,
arg
.
p_indices_
,
arg
.
p_indices_
,
arg
.
p_output_
,
arg
.
p_output_
,
...
@@ -121,9 +122,8 @@ struct DevicePutElementImpl
...
@@ -121,9 +122,8 @@ struct DevicePutElementImpl
bool
IsSupportedArgument
(
const
BaseArgument
*
p_arg
)
override
bool
IsSupportedArgument
(
const
BaseArgument
*
p_arg
)
override
{
{
const
Argument
*
pArg
=
dynamic_cast
<
const
Argument
*>
(
p_arg
);
const
Argument
*
pArg
=
dynamic_cast
<
const
Argument
*>
(
p_arg
);
index_t
input_length
=
pArg
->
in_grid_desc_
.
GetTransforms
()[
I0
].
GetUpperLengths
()[
I0
];
if
(
input_length
%
InVectorSize
!=
0
)
if
(
pArg
->
input_length
_raw_
%
InVectorSize
!=
0
)
{
{
return
false
;
return
false
;
}
}
...
...
library/include/ck/library/reference_tensor_operation/cpu/reference_maxpool_bwd.hpp
View file @
ed4912f2
...
@@ -6,11 +6,8 @@
...
@@ -6,11 +6,8 @@
#include <iostream>
#include <iostream>
#include <sstream>
#include <sstream>
#include <vector>
#include <vector>
#include <algorithm>
#include "ck/tensor_operation/gpu/device/device_base.hpp"
#include "ck/tensor_operation/gpu/device/device_base.hpp"
#include "ck/tensor_operation/gpu/device/reduction_operator_mapping.hpp"
#include "ck/utility/reduction_functions_accumulate.hpp"
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/host_tensor.hpp"
#include "ck/library/utility/host_tensor_generator.hpp"
#include "ck/library/utility/host_tensor_generator.hpp"
...
@@ -50,7 +47,7 @@ struct ReferenceMaxPoolBwd : public device::BaseOperator
...
@@ -50,7 +47,7 @@ struct ReferenceMaxPoolBwd : public device::BaseOperator
{
{
int
din_length
=
arg
.
din_
.
GetElementSpaceSize
();
int
din_length
=
arg
.
din_
.
GetElementSpaceSize
();
int
dout_length
=
arg
.
dout_
.
GetElementSpaceSize
();
int
dout_length
=
arg
.
dout_
.
GetElementSpaceSize
();
std
::
vector
<
ConputeDataType
>
buf
(
din_length
);
std
::
vector
<
ConputeDataType
>
buf
(
din_length
,
0
);
for
(
int
i
=
0
;
i
<
dout_length
;
++
i
)
for
(
int
i
=
0
;
i
<
dout_length
;
++
i
)
{
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment