__init__.py 4.86 KB
Newer Older
1
2
3
4
5
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

"""A module containing all the micro-benchmarks."""

6
from superbench.benchmarks.micro_benchmarks.micro_base import MicroBenchmark, MicroBenchmarkWithInvoke
7
8
from superbench.benchmarks.micro_benchmarks.gemm_flops_performance_base import GemmFlopsBenchmark
from superbench.benchmarks.micro_benchmarks.memory_bw_performance_base import MemBwBenchmark
one's avatar
one committed
9
from superbench.benchmarks.micro_benchmarks.gpu_hpcg_performance_base import GpuHpcgBenchmark
10

11
from superbench.benchmarks.micro_benchmarks.computation_communication_overlap import ComputationCommunicationOverlap
12
from superbench.benchmarks.micro_benchmarks.cublas_function import CublasBenchmark
13
from superbench.benchmarks.micro_benchmarks.blaslt_function_base import BlasLtBaseBenchmark
14
from superbench.benchmarks.micro_benchmarks.cublaslt_function import CublasLtBenchmark
15
16
from superbench.benchmarks.micro_benchmarks.rocm_hipblaslt_function import RocmHipBlasLtBenchmark
from superbench.benchmarks.micro_benchmarks.dtk_hipblaslt_function import DtkHipBlasLtBenchmark
one's avatar
one committed
17
from superbench.benchmarks.micro_benchmarks.dtk_memory_bw_performance import DtkMemBwBenchmark
one's avatar
one committed
18
from superbench.benchmarks.micro_benchmarks.dtk_gemm_flops_performance import DtkGemmFlopsBenchmark
one's avatar
one committed
19
from superbench.benchmarks.micro_benchmarks.dtk_hpcg_performance import DtkHpcgBenchmark
20
from superbench.benchmarks.micro_benchmarks.cuda_gemm_flops_performance import CudaGemmFlopsBenchmark
21
from superbench.benchmarks.micro_benchmarks.cuda_memory_bw_performance import CudaMemBwBenchmark
22
23
from superbench.benchmarks.micro_benchmarks.cuda_nccl_bw_performance import CudaNcclBwBenchmark
from superbench.benchmarks.micro_benchmarks.cudnn_function import CudnnBenchmark
24
from superbench.benchmarks.micro_benchmarks.disk_performance import DiskBenchmark
25
from superbench.benchmarks.micro_benchmarks.dist_inference import DistInference
26
from superbench.benchmarks.micro_benchmarks.cpu_memory_bw_latency_performance import CpuMemBwLatencyBenchmark
rafsalas19's avatar
rafsalas19 committed
27
from superbench.benchmarks.micro_benchmarks.cpu_stream_performance import CpuStreamBenchmark
rafsalas19's avatar
rafsalas19 committed
28
from superbench.benchmarks.micro_benchmarks.cpu_hpl_performance import CpuHplBenchmark
29
30
from superbench.benchmarks.micro_benchmarks.gpcnet_performance import GPCNetBenchmark
from superbench.benchmarks.micro_benchmarks.gpu_copy_bw_performance import GpuCopyBwBenchmark
31
from superbench.benchmarks.micro_benchmarks.gpu_stream import GpuStreamBenchmark
32
from superbench.benchmarks.micro_benchmarks.gpu_burn_test import GpuBurnBenchmark
33
from superbench.benchmarks.micro_benchmarks.ib_loopback_performance import IBLoopbackBenchmark
34
from superbench.benchmarks.micro_benchmarks.ib_validation_performance import IBBenchmark
35
from superbench.benchmarks.micro_benchmarks.kernel_launch_overhead import KernelLaunch
36
from superbench.benchmarks.micro_benchmarks.ort_inference_performance import ORTInferenceBenchmark
37
38
39
from superbench.benchmarks.micro_benchmarks.rocm_gemm_flops_performance import RocmGemmFlopsBenchmark
from superbench.benchmarks.micro_benchmarks.rocm_memory_bw_performance import RocmMemBwBenchmark
from superbench.benchmarks.micro_benchmarks.sharding_matmul import ShardingMatmul
40
from superbench.benchmarks.micro_benchmarks.tcp_connectivity import TCPConnectivityBenchmark
41
from superbench.benchmarks.micro_benchmarks.tensorrt_inference_performance import TensorRTInferenceBenchmark
42
from superbench.benchmarks.micro_benchmarks.directx_gpu_encoding_latency import DirectXGPUEncodingLatency
43
from superbench.benchmarks.micro_benchmarks.directx_gpu_copy_performance import DirectXGPUCopyBw
44
from superbench.benchmarks.micro_benchmarks.directx_mem_bw_performance import DirectXGPUMemBw
45
from superbench.benchmarks.micro_benchmarks.directx_gemm_flops_performance import DirectXGPUCoreFlops
46
from superbench.benchmarks.micro_benchmarks.nvbandwidth import NvBandwidthBenchmark
47

48
__all__ = [
49
    'BlasLtBaseBenchmark',
50
    'ComputationCommunicationOverlap',
51
    'CpuMemBwLatencyBenchmark',
rafsalas19's avatar
rafsalas19 committed
52
    'CpuHplBenchmark',
rafsalas19's avatar
rafsalas19 committed
53
    'CpuStreamBenchmark',
54
    'CublasBenchmark',
55
    'CublasLtBenchmark',
56
57
58
59
60
    'CudaGemmFlopsBenchmark',
    'CudaMemBwBenchmark',
    'CudaNcclBwBenchmark',
    'CudnnBenchmark',
    'DiskBenchmark',
61
    'DistInference',
one's avatar
one committed
62
    'DtkGemmFlopsBenchmark',
63
64
    'RocmHipBlasLtBenchmark',
    'DtkHipBlasLtBenchmark',
one's avatar
one committed
65
    'DtkMemBwBenchmark',
66
67
    'GPCNetBenchmark',
    'GemmFlopsBenchmark',
68
    'GpuBurnBenchmark',
69
    'GpuCopyBwBenchmark',
one's avatar
one committed
70
    'GpuHpcgBenchmark',
71
    'GpuStreamBenchmark',
72
73
74
75
76
77
    'IBBenchmark',
    'IBLoopbackBenchmark',
    'KernelLaunch',
    'MemBwBenchmark',
    'MicroBenchmark',
    'MicroBenchmarkWithInvoke',
78
    'ORTInferenceBenchmark',
79
80
81
82
83
    'RocmGemmFlopsBenchmark',
    'RocmMemBwBenchmark',
    'ShardingMatmul',
    'TCPConnectivityBenchmark',
    'TensorRTInferenceBenchmark',
84
    'DirectXGPUEncodingLatency',
85
    'DirectXGPUCopyBw',
86
    'DirectXGPUMemBw',
87
    'DirectXGPUCoreFlops',
one's avatar
one committed
88
    'DtkHpcgBenchmark',
89
    'NvBandwidthBenchmark',
90
]