__init__.py 4.25 KB
Newer Older
1
2
3
4
5
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

"""A module containing all the micro-benchmarks."""

6
from superbench.benchmarks.micro_benchmarks.micro_base import MicroBenchmark, MicroBenchmarkWithInvoke
7
8
9
from superbench.benchmarks.micro_benchmarks.gemm_flops_performance_base import GemmFlopsBenchmark
from superbench.benchmarks.micro_benchmarks.memory_bw_performance_base import MemBwBenchmark

10
from superbench.benchmarks.micro_benchmarks.computation_communication_overlap import ComputationCommunicationOverlap
11
from superbench.benchmarks.micro_benchmarks.cublas_function import CublasBenchmark
12
from superbench.benchmarks.micro_benchmarks.blaslt_function_base import BlasLtBaseBenchmark
13
from superbench.benchmarks.micro_benchmarks.cublaslt_function import CublasLtBenchmark
14
from superbench.benchmarks.micro_benchmarks.hipblaslt_function import HipBlasLtBenchmark
15
from superbench.benchmarks.micro_benchmarks.cuda_gemm_flops_performance import CudaGemmFlopsBenchmark
16
from superbench.benchmarks.micro_benchmarks.cuda_memory_bw_performance import CudaMemBwBenchmark
17
18
from superbench.benchmarks.micro_benchmarks.cuda_nccl_bw_performance import CudaNcclBwBenchmark
from superbench.benchmarks.micro_benchmarks.cudnn_function import CudnnBenchmark
19
from superbench.benchmarks.micro_benchmarks.disk_performance import DiskBenchmark
20
from superbench.benchmarks.micro_benchmarks.dist_inference import DistInference
21
from superbench.benchmarks.micro_benchmarks.cpu_memory_bw_latency_performance import CpuMemBwLatencyBenchmark
rafsalas19's avatar
rafsalas19 committed
22
from superbench.benchmarks.micro_benchmarks.cpu_stream_performance import CpuStreamBenchmark
rafsalas19's avatar
rafsalas19 committed
23
from superbench.benchmarks.micro_benchmarks.cpu_hpl_performance import CpuHplBenchmark
24
25
from superbench.benchmarks.micro_benchmarks.gpcnet_performance import GPCNetBenchmark
from superbench.benchmarks.micro_benchmarks.gpu_copy_bw_performance import GpuCopyBwBenchmark
26
from superbench.benchmarks.micro_benchmarks.gpu_stream import GpuStreamBenchmark
27
from superbench.benchmarks.micro_benchmarks.gpu_burn_test import GpuBurnBenchmark
28
from superbench.benchmarks.micro_benchmarks.ib_loopback_performance import IBLoopbackBenchmark
29
from superbench.benchmarks.micro_benchmarks.ib_validation_performance import IBBenchmark
30
from superbench.benchmarks.micro_benchmarks.kernel_launch_overhead import KernelLaunch
31
from superbench.benchmarks.micro_benchmarks.ort_inference_performance import ORTInferenceBenchmark
32
33
34
from superbench.benchmarks.micro_benchmarks.rocm_gemm_flops_performance import RocmGemmFlopsBenchmark
from superbench.benchmarks.micro_benchmarks.rocm_memory_bw_performance import RocmMemBwBenchmark
from superbench.benchmarks.micro_benchmarks.sharding_matmul import ShardingMatmul
35
from superbench.benchmarks.micro_benchmarks.tcp_connectivity import TCPConnectivityBenchmark
36
from superbench.benchmarks.micro_benchmarks.tensorrt_inference_performance import TensorRTInferenceBenchmark
37
from superbench.benchmarks.micro_benchmarks.directx_gpu_encoding_latency import DirectXGPUEncodingLatency
38
from superbench.benchmarks.micro_benchmarks.directx_gpu_copy_performance import DirectXGPUCopyBw
39
from superbench.benchmarks.micro_benchmarks.directx_mem_bw_performance import DirectXGPUMemBw
40
from superbench.benchmarks.micro_benchmarks.directx_gemm_flops_performance import DirectXGPUCoreFlops
41
from superbench.benchmarks.micro_benchmarks.nvbandwidth import NvBandwidthBenchmark
42

43
__all__ = [
44
    'BlasLtBaseBenchmark',
45
    'ComputationCommunicationOverlap',
46
    'CpuMemBwLatencyBenchmark',
rafsalas19's avatar
rafsalas19 committed
47
    'CpuHplBenchmark',
rafsalas19's avatar
rafsalas19 committed
48
    'CpuStreamBenchmark',
49
    'CublasBenchmark',
50
    'CublasLtBenchmark',
51
52
53
54
55
    'CudaGemmFlopsBenchmark',
    'CudaMemBwBenchmark',
    'CudaNcclBwBenchmark',
    'CudnnBenchmark',
    'DiskBenchmark',
56
    'DistInference',
57
    'HipBlasLtBenchmark',
58
59
    'GPCNetBenchmark',
    'GemmFlopsBenchmark',
60
    'GpuBurnBenchmark',
61
    'GpuCopyBwBenchmark',
62
    'GpuStreamBenchmark',
63
64
65
66
67
68
    'IBBenchmark',
    'IBLoopbackBenchmark',
    'KernelLaunch',
    'MemBwBenchmark',
    'MicroBenchmark',
    'MicroBenchmarkWithInvoke',
69
    'ORTInferenceBenchmark',
70
71
72
73
74
    'RocmGemmFlopsBenchmark',
    'RocmMemBwBenchmark',
    'ShardingMatmul',
    'TCPConnectivityBenchmark',
    'TensorRTInferenceBenchmark',
75
    'DirectXGPUEncodingLatency',
76
    'DirectXGPUCopyBw',
77
    'DirectXGPUMemBw',
78
    'DirectXGPUCoreFlops',
79
    'NvBandwidthBenchmark',
80
]