__init__.py 4.74 KB
Newer Older
1
2
3
4
5
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

"""A module containing all the micro-benchmarks."""

6
from superbench.benchmarks.micro_benchmarks.micro_base import MicroBenchmark, MicroBenchmarkWithInvoke
7
8
from superbench.benchmarks.micro_benchmarks.gemm_flops_performance_base import GemmFlopsBenchmark
from superbench.benchmarks.micro_benchmarks.memory_bw_performance_base import MemBwBenchmark
one's avatar
one committed
9
from superbench.benchmarks.micro_benchmarks.gpu_hpcg_performance_base import GpuHpcgBenchmark
10

11
from superbench.benchmarks.micro_benchmarks.computation_communication_overlap import ComputationCommunicationOverlap
12
from superbench.benchmarks.micro_benchmarks.cublas_function import CublasBenchmark
13
from superbench.benchmarks.micro_benchmarks.blaslt_function_base import BlasLtBaseBenchmark
14
from superbench.benchmarks.micro_benchmarks.cublaslt_function import CublasLtBenchmark
15
16
from superbench.benchmarks.micro_benchmarks.rocm_hipblaslt_function import RocmHipBlasLtBenchmark
from superbench.benchmarks.micro_benchmarks.dtk_hipblaslt_function import DtkHipBlasLtBenchmark
one's avatar
one committed
17
from superbench.benchmarks.micro_benchmarks.dtk_gemm_flops_performance import DtkGemmFlopsBenchmark
one's avatar
one committed
18
from superbench.benchmarks.micro_benchmarks.dtk_hpcg_performance import DtkHpcgBenchmark
19
from superbench.benchmarks.micro_benchmarks.cuda_gemm_flops_performance import CudaGemmFlopsBenchmark
20
from superbench.benchmarks.micro_benchmarks.cuda_memory_bw_performance import CudaMemBwBenchmark
21
22
from superbench.benchmarks.micro_benchmarks.cuda_nccl_bw_performance import CudaNcclBwBenchmark
from superbench.benchmarks.micro_benchmarks.cudnn_function import CudnnBenchmark
23
from superbench.benchmarks.micro_benchmarks.disk_performance import DiskBenchmark
24
from superbench.benchmarks.micro_benchmarks.dist_inference import DistInference
25
from superbench.benchmarks.micro_benchmarks.cpu_memory_bw_latency_performance import CpuMemBwLatencyBenchmark
rafsalas19's avatar
rafsalas19 committed
26
from superbench.benchmarks.micro_benchmarks.cpu_stream_performance import CpuStreamBenchmark
rafsalas19's avatar
rafsalas19 committed
27
from superbench.benchmarks.micro_benchmarks.cpu_hpl_performance import CpuHplBenchmark
28
29
from superbench.benchmarks.micro_benchmarks.gpcnet_performance import GPCNetBenchmark
from superbench.benchmarks.micro_benchmarks.gpu_copy_bw_performance import GpuCopyBwBenchmark
30
from superbench.benchmarks.micro_benchmarks.gpu_stream import GpuStreamBenchmark
31
from superbench.benchmarks.micro_benchmarks.gpu_burn_test import GpuBurnBenchmark
32
from superbench.benchmarks.micro_benchmarks.ib_loopback_performance import IBLoopbackBenchmark
33
from superbench.benchmarks.micro_benchmarks.ib_validation_performance import IBBenchmark
34
from superbench.benchmarks.micro_benchmarks.kernel_launch_overhead import KernelLaunch
35
from superbench.benchmarks.micro_benchmarks.ort_inference_performance import ORTInferenceBenchmark
36
37
38
from superbench.benchmarks.micro_benchmarks.rocm_gemm_flops_performance import RocmGemmFlopsBenchmark
from superbench.benchmarks.micro_benchmarks.rocm_memory_bw_performance import RocmMemBwBenchmark
from superbench.benchmarks.micro_benchmarks.sharding_matmul import ShardingMatmul
39
from superbench.benchmarks.micro_benchmarks.tcp_connectivity import TCPConnectivityBenchmark
40
from superbench.benchmarks.micro_benchmarks.tensorrt_inference_performance import TensorRTInferenceBenchmark
41
from superbench.benchmarks.micro_benchmarks.directx_gpu_encoding_latency import DirectXGPUEncodingLatency
42
from superbench.benchmarks.micro_benchmarks.directx_gpu_copy_performance import DirectXGPUCopyBw
43
from superbench.benchmarks.micro_benchmarks.directx_mem_bw_performance import DirectXGPUMemBw
44
from superbench.benchmarks.micro_benchmarks.directx_gemm_flops_performance import DirectXGPUCoreFlops
45
from superbench.benchmarks.micro_benchmarks.nvbandwidth import NvBandwidthBenchmark
46

47
__all__ = [
48
    'BlasLtBaseBenchmark',
49
    'ComputationCommunicationOverlap',
50
    'CpuMemBwLatencyBenchmark',
rafsalas19's avatar
rafsalas19 committed
51
    'CpuHplBenchmark',
rafsalas19's avatar
rafsalas19 committed
52
    'CpuStreamBenchmark',
53
    'CublasBenchmark',
54
    'CublasLtBenchmark',
55
56
57
58
59
    'CudaGemmFlopsBenchmark',
    'CudaMemBwBenchmark',
    'CudaNcclBwBenchmark',
    'CudnnBenchmark',
    'DiskBenchmark',
60
    'DistInference',
one's avatar
one committed
61
    'DtkGemmFlopsBenchmark',
62
63
    'RocmHipBlasLtBenchmark',
    'DtkHipBlasLtBenchmark',
64
65
    'GPCNetBenchmark',
    'GemmFlopsBenchmark',
66
    'GpuBurnBenchmark',
67
    'GpuCopyBwBenchmark',
one's avatar
one committed
68
    'GpuHpcgBenchmark',
69
    'GpuStreamBenchmark',
70
71
72
73
74
75
    'IBBenchmark',
    'IBLoopbackBenchmark',
    'KernelLaunch',
    'MemBwBenchmark',
    'MicroBenchmark',
    'MicroBenchmarkWithInvoke',
76
    'ORTInferenceBenchmark',
77
78
79
80
81
    'RocmGemmFlopsBenchmark',
    'RocmMemBwBenchmark',
    'ShardingMatmul',
    'TCPConnectivityBenchmark',
    'TensorRTInferenceBenchmark',
82
    'DirectXGPUEncodingLatency',
83
    'DirectXGPUCopyBw',
84
    'DirectXGPUMemBw',
85
    'DirectXGPUCoreFlops',
one's avatar
one committed
86
    'DtkHpcgBenchmark',
87
    'NvBandwidthBenchmark',
88
]