# Copyright (c) Microsoft Corporation. # Licensed under the MIT License. """A module containing all the micro-benchmarks.""" from superbench.benchmarks.micro_benchmarks.micro_base import MicroBenchmark, MicroBenchmarkWithInvoke from superbench.benchmarks.micro_benchmarks.gemm_flops_performance_base import GemmFlopsBenchmark from superbench.benchmarks.micro_benchmarks.memory_bw_performance_base import MemBwBenchmark from superbench.benchmarks.micro_benchmarks.gpu_hpcg_performance_base import GpuHpcgBenchmark from superbench.benchmarks.micro_benchmarks.computation_communication_overlap import ComputationCommunicationOverlap from superbench.benchmarks.micro_benchmarks.cublas_function import CublasBenchmark from superbench.benchmarks.micro_benchmarks.blaslt_function_base import BlasLtBaseBenchmark from superbench.benchmarks.micro_benchmarks.cublaslt_function import CublasLtBenchmark from superbench.benchmarks.micro_benchmarks.rocm_hipblaslt_function import RocmHipBlasLtBenchmark from superbench.benchmarks.micro_benchmarks.dtk_hipblaslt_function import DtkHipBlasLtBenchmark from superbench.benchmarks.micro_benchmarks.dtk_gemm_flops_performance import DtkGemmFlopsBenchmark from superbench.benchmarks.micro_benchmarks.dtk_hpcg_performance import DtkHpcgBenchmark from superbench.benchmarks.micro_benchmarks.cuda_gemm_flops_performance import CudaGemmFlopsBenchmark from superbench.benchmarks.micro_benchmarks.cuda_memory_bw_performance import CudaMemBwBenchmark from superbench.benchmarks.micro_benchmarks.cuda_nccl_bw_performance import CudaNcclBwBenchmark from superbench.benchmarks.micro_benchmarks.cudnn_function import CudnnBenchmark from superbench.benchmarks.micro_benchmarks.disk_performance import DiskBenchmark from superbench.benchmarks.micro_benchmarks.dist_inference import DistInference from superbench.benchmarks.micro_benchmarks.cpu_memory_bw_latency_performance import CpuMemBwLatencyBenchmark from superbench.benchmarks.micro_benchmarks.cpu_stream_performance import CpuStreamBenchmark from superbench.benchmarks.micro_benchmarks.cpu_hpl_performance import CpuHplBenchmark from superbench.benchmarks.micro_benchmarks.gpcnet_performance import GPCNetBenchmark from superbench.benchmarks.micro_benchmarks.gpu_copy_bw_performance import GpuCopyBwBenchmark from superbench.benchmarks.micro_benchmarks.gpu_stream import GpuStreamBenchmark from superbench.benchmarks.micro_benchmarks.gpu_burn_test import GpuBurnBenchmark from superbench.benchmarks.micro_benchmarks.ib_loopback_performance import IBLoopbackBenchmark from superbench.benchmarks.micro_benchmarks.ib_validation_performance import IBBenchmark from superbench.benchmarks.micro_benchmarks.kernel_launch_overhead import KernelLaunch from superbench.benchmarks.micro_benchmarks.ort_inference_performance import ORTInferenceBenchmark from superbench.benchmarks.micro_benchmarks.rocm_gemm_flops_performance import RocmGemmFlopsBenchmark from superbench.benchmarks.micro_benchmarks.rocm_memory_bw_performance import RocmMemBwBenchmark from superbench.benchmarks.micro_benchmarks.sharding_matmul import ShardingMatmul from superbench.benchmarks.micro_benchmarks.tcp_connectivity import TCPConnectivityBenchmark from superbench.benchmarks.micro_benchmarks.tensorrt_inference_performance import TensorRTInferenceBenchmark from superbench.benchmarks.micro_benchmarks.directx_gpu_encoding_latency import DirectXGPUEncodingLatency from superbench.benchmarks.micro_benchmarks.directx_gpu_copy_performance import DirectXGPUCopyBw from superbench.benchmarks.micro_benchmarks.directx_mem_bw_performance import DirectXGPUMemBw from superbench.benchmarks.micro_benchmarks.directx_gemm_flops_performance import DirectXGPUCoreFlops from superbench.benchmarks.micro_benchmarks.nvbandwidth import NvBandwidthBenchmark __all__ = [ 'BlasLtBaseBenchmark', 'ComputationCommunicationOverlap', 'CpuMemBwLatencyBenchmark', 'CpuHplBenchmark', 'CpuStreamBenchmark', 'CublasBenchmark', 'CublasLtBenchmark', 'CudaGemmFlopsBenchmark', 'CudaMemBwBenchmark', 'CudaNcclBwBenchmark', 'CudnnBenchmark', 'DiskBenchmark', 'DistInference', 'DtkGemmFlopsBenchmark', 'RocmHipBlasLtBenchmark', 'DtkHipBlasLtBenchmark', 'GPCNetBenchmark', 'GemmFlopsBenchmark', 'GpuBurnBenchmark', 'GpuCopyBwBenchmark', 'GpuHpcgBenchmark', 'GpuStreamBenchmark', 'IBBenchmark', 'IBLoopbackBenchmark', 'KernelLaunch', 'MemBwBenchmark', 'MicroBenchmark', 'MicroBenchmarkWithInvoke', 'ORTInferenceBenchmark', 'RocmGemmFlopsBenchmark', 'RocmMemBwBenchmark', 'ShardingMatmul', 'TCPConnectivityBenchmark', 'TensorRTInferenceBenchmark', 'DirectXGPUEncodingLatency', 'DirectXGPUCopyBw', 'DirectXGPUMemBw', 'DirectXGPUCoreFlops', 'DtkHpcgBenchmark', 'NvBandwidthBenchmark', ]