"vscode:/vscode.git/clone" did not exist on "fb25d5391143a0fd4cbce862f19472ddc2a1ecab"
Unverified Commit e304cf15 authored by Yuting Jiang's avatar Yuting Jiang Committed by GitHub
Browse files

Benchmarks: Micro benchmarks - add support for NVIDIA L4/L40/L40s GPUs in gemm-flops (#634)

**Description**
Add support GPU ARCH 8.9 for NVIDIA L4/L40/L40s GPUs in gemm-flops.
parent 4e27142a
...@@ -33,6 +33,6 @@ if(NOT DEFINED NVCC_ARCHS_SUPPORTED) ...@@ -33,6 +33,6 @@ if(NOT DEFINED NVCC_ARCHS_SUPPORTED)
list(APPEND NVCC_ARCHS_SUPPORTED 86) list(APPEND NVCC_ARCHS_SUPPORTED 86)
endif() endif()
if (NOT CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11.8) if (NOT CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11.8)
list(APPEND NVCC_ARCHS_SUPPORTED 90) list(APPEND NVCC_ARCHS_SUPPORTED 89 90)
endif() endif()
endif() endif()
...@@ -47,6 +47,8 @@ def __init__(self, name, parameters=''): ...@@ -47,6 +47,8 @@ def __init__(self, name, parameters=''):
# Skip FP64 for RTX Turing/Ampere and Tesla T4/GA10x due to very limited FP64 TFLOP rate # Skip FP64 for RTX Turing/Ampere and Tesla T4/GA10x due to very limited FP64 TFLOP rate
self.__kernel_map[7.5] = {k: self.__kernel_map[7.0][k] for k in self.__kernel_map[7.0] if 'fp64' not in k} self.__kernel_map[7.5] = {k: self.__kernel_map[7.0][k] for k in self.__kernel_map[7.0] if 'fp64' not in k}
self.__kernel_map[8.6] = {k: self.__kernel_map[8.0][k] for k in self.__kernel_map[8.0] if 'fp64' not in k} self.__kernel_map[8.6] = {k: self.__kernel_map[8.0][k] for k in self.__kernel_map[8.0] if 'fp64' not in k}
# Skip FP64 for Ada Lovelace L4/L40 due to no native CUDA/Tensor Cores
self.__kernel_map[8.9] = {k: self.__kernel_map[8.0][k] for k in self.__kernel_map[8.0] if 'fp64' not in k}
# Skip INT4 for Hopper due to no native CUDA/Tensor Cores # Skip INT4 for Hopper due to no native CUDA/Tensor Cores
self.__kernel_map[9.0] = {k: self.__kernel_map[8.0][k] for k in self.__kernel_map[8.0] if 'int4_tc' not in k} self.__kernel_map[9.0] = {k: self.__kernel_map[8.0][k] for k in self.__kernel_map[8.0] if 'int4_tc' not in k}
self.__parse_logline = [ self.__parse_logline = [
......
...@@ -33,7 +33,7 @@ sb_micro_path: ...@@ -33,7 +33,7 @@ sb_micro_path:
# Build cutlass. # Build cutlass.
cuda_cutlass: cuda_cutlass:
ifeq ($(shell echo $(CUDA_VER)">=11.8" | bc -l), 1) ifeq ($(shell echo $(CUDA_VER)">=11.8" | bc -l), 1)
$(eval ARCHS := "70;75;80;86;90") $(eval ARCHS := "70;75;80;86;89;90")
else else
$(eval ARCHS := "70;75;80;86") $(eval ARCHS := "70;75;80;86")
endif endif
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment