# Copyright (c) Microsoft Corporation. # Licensed under the MIT License. """Tests for Computation-Communication-Overlap benchmark.""" import unittest from tests.helper import decorator import tests.benchmarks.utils as utils from superbench.benchmarks import BenchmarkRegistry, Framework, BenchmarkType, ReturnCode from superbench.benchmarks.micro_benchmarks.computation_communication_overlap \ import ComputationCommunicationOverlap, ComputationKernelType # TODO - replace unittest.skip("no multiple GPUs") to decorator of skipIfNoMultiGPUS @unittest.skip('no multiple GPUs') @decorator.cuda_test @decorator.pytorch_test def test_pytorch_computation_communication_overlap_normal(): """Test pytorch-computation-communication-overlap benchmark on distributed normal case.""" context = BenchmarkRegistry.create_benchmark_context( 'computation-communication-overlap', parameters='--num_warmup 5 --num_steps 10 --ratio 5', framework=Framework.PYTORCH ) world_size = 2 assert (BenchmarkRegistry.is_benchmark_context_valid(context)) results = utils.simulated_ddp_distributed_benchmark(context, world_size) for benchmark in results: # Check basic information. assert (benchmark) assert (isinstance(benchmark, ComputationCommunicationOverlap)) assert (benchmark.name == 'pytorch-computation-communication-overlap') assert (benchmark.type == BenchmarkType.MICRO) # Check predefined parameters of sharding-matmul benchmark. assert (benchmark._args.kernel == [ComputationKernelType.MUL, ComputationKernelType.MATMUL]) # Check parameters specified in BenchmarkContext. assert (benchmark._args.num_steps == 10) # Check results and metrics. assert (benchmark.run_count == 1) assert (benchmark.return_code == ReturnCode.SUCCESS) assert (len(benchmark.raw_data) == benchmark.run_count * len(benchmark._args.kernel)) assert (len(benchmark.result) == benchmark.run_count * len(benchmark._args.kernel)) @decorator.cuda_test @decorator.pytorch_test def test_pytorch_computation_communication_overlap_fake_distributed(): """Test pytorch-computation-communication-overlap benchmark on single gpu.""" context = BenchmarkRegistry.create_benchmark_context( 'computation-communication-overlap', parameters='--num_warmup 5 --num_steps 10 --ratio 5', framework=Framework.PYTORCH ) utils.setup_simulated_ddp_distributed_env(1, 0, utils.get_free_port()) benchmark = BenchmarkRegistry.launch_benchmark(context) # Check basic information. assert (benchmark) assert (isinstance(benchmark, ComputationCommunicationOverlap)) assert (benchmark.name == 'pytorch-computation-communication-overlap') assert (benchmark.type == BenchmarkType.MICRO) # Check predefined parameters of sharding-matmul benchmark. assert (benchmark._args.kernel == [ComputationKernelType.MUL, ComputationKernelType.MATMUL]) # Check parameters specified in BenchmarkContext. assert (benchmark._args.num_steps == 10) # Check results and metrics. assert (benchmark.run_count == 1) assert (benchmark.return_code == ReturnCode.SUCCESS) assert (len(benchmark.raw_data) == benchmark.run_count * len(benchmark._args.kernel)) assert (len(benchmark.result) == benchmark.run_count * len(benchmark._args.kernel)) utils.clean_simulated_ddp_distributed_env()