__init__.py 2.99 KB
Newer Older
1
2
3
4
5
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

"""A module containing all the micro-benchmarks."""

6
from superbench.benchmarks.micro_benchmarks.micro_base import MicroBenchmark, MicroBenchmarkWithInvoke
7
8
9
from superbench.benchmarks.micro_benchmarks.gemm_flops_performance_base import GemmFlopsBenchmark
from superbench.benchmarks.micro_benchmarks.memory_bw_performance_base import MemBwBenchmark

10
from superbench.benchmarks.micro_benchmarks.computation_communication_overlap import ComputationCommunicationOverlap
11
from superbench.benchmarks.micro_benchmarks.cublas_function import CublasBenchmark
12
from superbench.benchmarks.micro_benchmarks.cublaslt_function import CublasLtBenchmark
13
from superbench.benchmarks.micro_benchmarks.cuda_gemm_flops_performance import CudaGemmFlopsBenchmark
14
from superbench.benchmarks.micro_benchmarks.cuda_memory_bw_performance import CudaMemBwBenchmark
15
16
from superbench.benchmarks.micro_benchmarks.cuda_nccl_bw_performance import CudaNcclBwBenchmark
from superbench.benchmarks.micro_benchmarks.cudnn_function import CudnnBenchmark
17
from superbench.benchmarks.micro_benchmarks.disk_performance import DiskBenchmark
18
from superbench.benchmarks.micro_benchmarks.cpu_memory_bw_latency_performance import CpuMemBwLatencyBenchmark
19
20
from superbench.benchmarks.micro_benchmarks.gpcnet_performance import GPCNetBenchmark
from superbench.benchmarks.micro_benchmarks.gpu_copy_bw_performance import GpuCopyBwBenchmark
21
from superbench.benchmarks.micro_benchmarks.gpu_burn_test import GpuBurnBenchmark
22
from superbench.benchmarks.micro_benchmarks.ib_loopback_performance import IBLoopbackBenchmark
23
from superbench.benchmarks.micro_benchmarks.ib_validation_performance import IBBenchmark
24
from superbench.benchmarks.micro_benchmarks.kernel_launch_overhead import KernelLaunch
25
from superbench.benchmarks.micro_benchmarks.ort_inference_performance import ORTInferenceBenchmark
26
27
28
from superbench.benchmarks.micro_benchmarks.rocm_gemm_flops_performance import RocmGemmFlopsBenchmark
from superbench.benchmarks.micro_benchmarks.rocm_memory_bw_performance import RocmMemBwBenchmark
from superbench.benchmarks.micro_benchmarks.sharding_matmul import ShardingMatmul
29
from superbench.benchmarks.micro_benchmarks.tcp_connectivity import TCPConnectivityBenchmark
30
from superbench.benchmarks.micro_benchmarks.tensorrt_inference_performance import TensorRTInferenceBenchmark
31

32
__all__ = [
33
    'ComputationCommunicationOverlap',
34
    'CpuMemBwLatencyBenchmark',
35
    'CublasBenchmark',
36
    'CublasLtBenchmark',
37
38
39
40
41
42
43
    'CudaGemmFlopsBenchmark',
    'CudaMemBwBenchmark',
    'CudaNcclBwBenchmark',
    'CudnnBenchmark',
    'DiskBenchmark',
    'GPCNetBenchmark',
    'GemmFlopsBenchmark',
44
    'GpuBurnBenchmark',
45
    'GpuCopyBwBenchmark',
46
47
48
49
50
51
    'IBBenchmark',
    'IBLoopbackBenchmark',
    'KernelLaunch',
    'MemBwBenchmark',
    'MicroBenchmark',
    'MicroBenchmarkWithInvoke',
52
    'ORTInferenceBenchmark',
53
54
55
56
57
    'RocmGemmFlopsBenchmark',
    'RocmMemBwBenchmark',
    'ShardingMatmul',
    'TCPConnectivityBenchmark',
    'TensorRTInferenceBenchmark',
58
]