__init__.py 4.04 KB
Newer Older
1
2
3
4
5
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

"""A module containing all the micro-benchmarks."""

6
from superbench.benchmarks.micro_benchmarks.micro_base import MicroBenchmark, MicroBenchmarkWithInvoke
7
8
9
from superbench.benchmarks.micro_benchmarks.gemm_flops_performance_base import GemmFlopsBenchmark
from superbench.benchmarks.micro_benchmarks.memory_bw_performance_base import MemBwBenchmark

10
from superbench.benchmarks.micro_benchmarks.computation_communication_overlap import ComputationCommunicationOverlap
11
from superbench.benchmarks.micro_benchmarks.cublas_function import CublasBenchmark
12
from superbench.benchmarks.micro_benchmarks.blaslt_function_base import BlasLtBaseBenchmark
13
from superbench.benchmarks.micro_benchmarks.cublaslt_function import CublasLtBenchmark
14
from superbench.benchmarks.micro_benchmarks.hipblaslt_function import HipBlasLtBenchmark
15
from superbench.benchmarks.micro_benchmarks.cuda_gemm_flops_performance import CudaGemmFlopsBenchmark
16
from superbench.benchmarks.micro_benchmarks.cuda_memory_bw_performance import CudaMemBwBenchmark
17
18
from superbench.benchmarks.micro_benchmarks.cuda_nccl_bw_performance import CudaNcclBwBenchmark
from superbench.benchmarks.micro_benchmarks.cudnn_function import CudnnBenchmark
19
from superbench.benchmarks.micro_benchmarks.disk_performance import DiskBenchmark
20
from superbench.benchmarks.micro_benchmarks.dist_inference import DistInference
21
from superbench.benchmarks.micro_benchmarks.cpu_memory_bw_latency_performance import CpuMemBwLatencyBenchmark
rafsalas19's avatar
rafsalas19 committed
22
from superbench.benchmarks.micro_benchmarks.cpu_stream_performance import CpuStreamBenchmark
rafsalas19's avatar
rafsalas19 committed
23
from superbench.benchmarks.micro_benchmarks.cpu_hpl_performance import CpuHplBenchmark
24
25
from superbench.benchmarks.micro_benchmarks.gpcnet_performance import GPCNetBenchmark
from superbench.benchmarks.micro_benchmarks.gpu_copy_bw_performance import GpuCopyBwBenchmark
26
from superbench.benchmarks.micro_benchmarks.gpu_burn_test import GpuBurnBenchmark
27
from superbench.benchmarks.micro_benchmarks.ib_loopback_performance import IBLoopbackBenchmark
28
from superbench.benchmarks.micro_benchmarks.ib_validation_performance import IBBenchmark
29
from superbench.benchmarks.micro_benchmarks.kernel_launch_overhead import KernelLaunch
30
from superbench.benchmarks.micro_benchmarks.ort_inference_performance import ORTInferenceBenchmark
31
32
33
from superbench.benchmarks.micro_benchmarks.rocm_gemm_flops_performance import RocmGemmFlopsBenchmark
from superbench.benchmarks.micro_benchmarks.rocm_memory_bw_performance import RocmMemBwBenchmark
from superbench.benchmarks.micro_benchmarks.sharding_matmul import ShardingMatmul
34
from superbench.benchmarks.micro_benchmarks.tcp_connectivity import TCPConnectivityBenchmark
35
from superbench.benchmarks.micro_benchmarks.tensorrt_inference_performance import TensorRTInferenceBenchmark
36
from superbench.benchmarks.micro_benchmarks.directx_gpu_encoding_latency import DirectXGPUEncodingLatency
37
from superbench.benchmarks.micro_benchmarks.directx_gpu_copy_performance import DirectXGPUCopyBw
38
from superbench.benchmarks.micro_benchmarks.directx_mem_bw_performance import DirectXGPUMemBw
39
from superbench.benchmarks.micro_benchmarks.directx_gemm_flops_performance import DirectXGPUCoreFlops
40

41
__all__ = [
42
    'BlasLtBaseBenchmark',
43
    'ComputationCommunicationOverlap',
44
    'CpuMemBwLatencyBenchmark',
rafsalas19's avatar
rafsalas19 committed
45
    'CpuHplBenchmark',
rafsalas19's avatar
rafsalas19 committed
46
    'CpuStreamBenchmark',
47
    'CublasBenchmark',
48
    'CublasLtBenchmark',
49
50
51
52
53
    'CudaGemmFlopsBenchmark',
    'CudaMemBwBenchmark',
    'CudaNcclBwBenchmark',
    'CudnnBenchmark',
    'DiskBenchmark',
54
    'DistInference',
55
    'HipBlasLtBenchmark',
56
57
    'GPCNetBenchmark',
    'GemmFlopsBenchmark',
58
    'GpuBurnBenchmark',
59
    'GpuCopyBwBenchmark',
60
61
62
63
64
65
    'IBBenchmark',
    'IBLoopbackBenchmark',
    'KernelLaunch',
    'MemBwBenchmark',
    'MicroBenchmark',
    'MicroBenchmarkWithInvoke',
66
    'ORTInferenceBenchmark',
67
68
69
70
71
    'RocmGemmFlopsBenchmark',
    'RocmMemBwBenchmark',
    'ShardingMatmul',
    'TCPConnectivityBenchmark',
    'TensorRTInferenceBenchmark',
72
    'DirectXGPUEncodingLatency',
73
    'DirectXGPUCopyBw',
74
    'DirectXGPUMemBw',
75
    'DirectXGPUCoreFlops',
76
]