Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
tsoc
superbenchmark
Commits
3419447c
Unverified
Commit
3419447c
authored
Jan 29, 2022
by
Yifan Xiong
Committed by
GitHub
Jan 29, 2022
Browse files
Benchmarks - Support T4 and A10 in GEMM benchmark (#294)
Support T4 and A10 in GEMM benchmark.
parent
3524975c
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
8 additions
and
5 deletions
+8
-5
superbench/benchmarks/micro_benchmarks/cuda_gemm_flops_performance.py
...enchmarks/micro_benchmarks/cuda_gemm_flops_performance.py
+6
-3
tests/benchmarks/micro_benchmarks/test_cuda_gemm_flops_performance.py
...arks/micro_benchmarks/test_cuda_gemm_flops_performance.py
+1
-1
third_party/Makefile
third_party/Makefile
+1
-1
No files found.
superbench/benchmarks/micro_benchmarks/cuda_gemm_flops_performance.py
View file @
3419447c
...
...
@@ -24,7 +24,7 @@ def __init__(self, name, parameters=''):
self
.
_bin_name
=
'cutlass_profiler'
# TODO - To support more architecutres, currently only support compute capability = 7.0
and 8.0
# TODO - To support more architecutres, currently only support compute capability = 7.0
, 7.5, 8.0, 8.6
self
.
__kernel_map
=
{
7.0
:
{
'fp64'
:
'cutlass_simt_dgemm_128x128_8x2_*'
,
...
...
@@ -44,6 +44,9 @@ def __init__(self, name, parameters=''):
'int4_tc'
:
'cutlass_tensorop_s4_i16864gemm_s4_256x128_128x3_*'
,
}
}
# Skip FP64 for RTX Turing/Ampere and Tesla T4/GA10x due to very limited FP64 TFLOP rate
self
.
__kernel_map
[
7.5
]
=
{
k
:
self
.
__kernel_map
[
7.0
][
k
]
for
k
in
self
.
__kernel_map
[
7.0
]
if
'fp64'
not
in
k
}
self
.
__kernel_map
[
8.6
]
=
{
k
:
self
.
__kernel_map
[
8.0
][
k
]
for
k
in
self
.
__kernel_map
[
8.0
]
if
'fp64'
not
in
k
}
self
.
__parse_logline
=
[
'gemm,cutlass_simt_dgemm_128x128_8x2'
,
'gemm,cutlass_simt_sgemm_128x128_8x2'
,
'gemm,cutlass_simt_hgemm_256x128_8x2'
,
'gemm,cutlass_tensorop_d884gemm_128x128_16x3'
,
...
...
@@ -70,8 +73,8 @@ def _preprocess(self):
super
().
_preprocess
()
self
.
_result
.
set_return_code
(
ReturnCode
.
MICROBENCHMARK_UNSUPPORTED_ARCHITECTURE
)
logger
.
error
(
'Unsupported architecture - benchmark: {}, compute capability: {},
expected: 7.0 or 8.0
'
.
format
(
self
.
_name
,
capability
'Unsupported architecture - benchmark: {}, compute capability: {},
supports {}
'
.
format
(
self
.
_name
,
capability
,
' '
.
join
(
sorted
([
str
(
k
)
for
k
in
self
.
__kernel_map
]))
)
)
return
False
...
...
tests/benchmarks/micro_benchmarks/test_cuda_gemm_flops_performance.py
View file @
3419447c
...
...
@@ -35,7 +35,7 @@ def test_flops_performance_cuda(self):
)
ret
=
benchmark
.
_preprocess
()
if
dm
.
device_manager
.
get_device_compute_capability
()
not
in
[
7.0
,
8.0
]
:
if
dm
.
device_manager
.
get_device_compute_capability
()
not
in
benchmark
.
_CudaGemmFlopsBenchmark__kernel_map
:
assert
(
ret
is
False
)
assert
(
benchmark
.
return_code
==
ReturnCode
.
MICROBENCHMARK_UNSUPPORTED_ARCHITECTURE
)
else
:
...
...
third_party/Makefile
View file @
3419447c
...
...
@@ -27,7 +27,7 @@ sb_micro_path:
cuda_cutlass
:
ifneq
(,$(wildcard cutlass/CMakeLists.txt))
cmake
-DCMAKE_INSTALL_BINDIR
=
$(SB_MICRO_PATH)
/bin
-DCMAKE_INSTALL_LIBDIR
=
$(SB_MICRO_PATH)
/lib
-DCMAKE_BUILD_TYPE
=
Release
\
-DCUTLASS_NVCC_ARCHS
=
'70;
80
'
-DCUTLASS_ENABLE_EXAMPLES
=
OFF
-DCUTLASS_ENABLE_TESTS
=
OFF
-S
./cutlass
-B
./cutlass/build
-DCUTLASS_NVCC_ARCHS
=
'70;
75;80;86
'
-DCUTLASS_ENABLE_EXAMPLES
=
OFF
-DCUTLASS_ENABLE_TESTS
=
OFF
-S
./cutlass
-B
./cutlass/build
cmake
--build
./cutlass/build
-j
8
--target
install
endif
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment