Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
yangql
composable_kernel-1
Commits
237d4ca0
Unverified
Commit
237d4ca0
authored
Nov 30, 2021
by
Chao Liu
Committed by
GitHub
Nov 30, 2021
Browse files
added test for magic number division (#58)
parent
567f5e9c
Changes
3
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
162 additions
and
0 deletions
+162
-0
CMakeLists.txt
CMakeLists.txt
+1
-0
test/CMakeLists.txt
test/CMakeLists.txt
+18
-0
test/magic_number_division/main.cpp
test/magic_number_division/main.cpp
+143
-0
No files found.
CMakeLists.txt
View file @
237d4ca0
...
@@ -200,3 +200,4 @@ enable_cppcheck(
...
@@ -200,3 +200,4 @@ enable_cppcheck(
add_subdirectory
(
host
)
add_subdirectory
(
host
)
add_subdirectory
(
example
)
add_subdirectory
(
example
)
add_subdirectory
(
profiler
)
add_subdirectory
(
profiler
)
add_subdirectory
(
test
)
test/CMakeLists.txt
0 → 100644
View file @
237d4ca0
include_directories
(
BEFORE
include
${
PROJECT_SOURCE_DIR
}
/host/host_tensor/include
${
PROJECT_SOURCE_DIR
}
/host/device/include
${
PROJECT_SOURCE_DIR
}
/device_operation/include
${
PROJECT_SOURCE_DIR
}
/composable_kernel/include
${
PROJECT_SOURCE_DIR
}
/composable_kernel/include/utility
${
PROJECT_SOURCE_DIR
}
/composable_kernel/include/tensor_description
${
PROJECT_SOURCE_DIR
}
/composable_kernel/include/tensor_operation
${
PROJECT_SOURCE_DIR
}
/composable_kernel/include/problem_transform
${
PROJECT_SOURCE_DIR
}
/external/rocm/include
)
set
(
MAGIC_NUMBER_DIVISISON_SOURCE magic_number_division/main.cpp
)
add_executable
(
test_magic_number_division
${
MAGIC_NUMBER_DIVISISON_SOURCE
}
)
target_link_libraries
(
test_magic_number_division PRIVATE host_tensor
)
test/magic_number_division/main.cpp
0 → 100644
View file @
237d4ca0
#include <iostream>
#include <numeric>
#include <initializer_list>
#include <cstdlib>
#include <stdlib.h>
#include <half.hpp>
#include "config.hpp"
#include "print.hpp"
#include "device.hpp"
#include "host_tensor.hpp"
#include "host_tensor_generator.hpp"
#include "device_tensor.hpp"
__global__
void
gpu_magic_number_division
(
uint32_t
magic_multiplier
,
uint32_t
magic_shift
,
const
int32_t
*
p_dividend
,
int32_t
*
p_result
,
uint64_t
num
)
{
uint64_t
global_thread_num
=
blockDim
.
x
*
gridDim
.
x
;
uint64_t
global_thread_id
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
for
(
uint64_t
data_id
=
global_thread_id
;
data_id
<
num
;
data_id
+=
global_thread_num
)
{
p_result
[
data_id
]
=
ck
::
MagicDivision
::
DoMagicDivision
(
p_dividend
[
data_id
],
magic_multiplier
,
magic_shift
);
}
}
__global__
void
gpu_naive_division
(
int32_t
divisor
,
const
int32_t
*
p_dividend
,
int32_t
*
p_result
,
uint64_t
num
)
{
uint64_t
global_thread_num
=
blockDim
.
x
*
gridDim
.
x
;
uint64_t
global_thread_id
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
for
(
uint64_t
data_id
=
global_thread_id
;
data_id
<
num
;
data_id
+=
global_thread_num
)
{
p_result
[
data_id
]
=
p_dividend
[
data_id
]
/
divisor
;
}
}
template
<
typename
T
>
T
check_error
(
const
std
::
vector
<
T
>&
ref
,
const
std
::
vector
<
T
>&
result
)
{
T
error
=
0
;
T
max_diff
=
0
;
T
ref_value
=
0
,
result_value
=
0
;
for
(
std
::
size_t
i
=
0
;
i
<
ref
.
size
();
++
i
)
{
T
diff
=
std
::
abs
(
ref
[
i
]
-
result
[
i
]);
error
+=
diff
;
if
(
max_diff
<
diff
)
{
max_diff
=
diff
;
ref_value
=
ref
[
i
];
result_value
=
result
[
i
];
}
}
return
max_diff
;
}
int
main
(
int
,
char
*
[])
{
uint64_t
num_divisor
=
4096
;
uint64_t
num_dividend
=
1L
<<
16
;
std
::
vector
<
int32_t
>
divisors_host
(
num_divisor
);
std
::
vector
<
int32_t
>
dividends_host
(
num_dividend
);
// generate divisor
for
(
uint64_t
i
=
0
;
i
<
num_divisor
;
++
i
)
{
divisors_host
[
i
]
=
i
+
1
;
}
// generate dividend
for
(
uint64_t
i
=
0
;
i
<
num_divisor
;
++
i
)
{
dividends_host
[
i
]
=
i
;
}
DeviceMem
dividends_dev_buf
(
sizeof
(
int32_t
)
*
num_dividend
);
DeviceMem
naive_result_dev_buf
(
sizeof
(
int32_t
)
*
num_dividend
);
DeviceMem
magic_result_dev_buf
(
sizeof
(
int32_t
)
*
num_dividend
);
std
::
vector
<
int32_t
>
naive_result_host
(
num_dividend
);
std
::
vector
<
int32_t
>
magic_result_host
(
num_dividend
);
dividends_dev_buf
.
ToDevice
(
dividends_host
.
data
());
bool
pass
=
true
;
for
(
std
::
size_t
i
=
0
;
i
<
num_divisor
;
++
i
)
{
// run naive division on GPU
gpu_naive_division
<<<
1024
,
256
>>>
(
divisors_host
[
i
],
static_cast
<
const
int32_t
*>
(
dividends_dev_buf
.
GetDeviceBuffer
()),
static_cast
<
int32_t
*>
(
naive_result_dev_buf
.
GetDeviceBuffer
()),
num_dividend
);
// calculate magic number
uint32_t
magic_multiplier
,
magic_shift
;
ck
::
tie
(
magic_multiplier
,
magic_shift
)
=
ck
::
MagicDivision
::
CalculateMagicNumbers
(
divisors_host
[
i
]);
// run magic division on GPU
gpu_magic_number_division
<<<
1024
,
256
>>>
(
magic_multiplier
,
magic_shift
,
static_cast
<
const
int32_t
*>
(
dividends_dev_buf
.
GetDeviceBuffer
()),
static_cast
<
int32_t
*>
(
magic_result_dev_buf
.
GetDeviceBuffer
()),
num_dividend
);
naive_result_dev_buf
.
FromDevice
(
naive_result_host
.
data
());
magic_result_dev_buf
.
FromDevice
(
magic_result_host
.
data
());
int32_t
max_diff
=
check_error
(
naive_result_host
,
magic_result_host
);
if
(
max_diff
!=
0
)
{
pass
=
false
;
continue
;
}
}
if
(
pass
)
{
std
::
cout
<<
"test magic number division: Pass"
<<
std
::
endl
;
}
else
{
std
::
cout
<<
"test magic number division: Fail"
<<
std
::
endl
;
}
return
1
;
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment