test_pytorch_mixtral.py

# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.

"""Tests for mixtral model benchmarks."""

import sys

from tests.helper import decorator
from superbench.benchmarks import BenchmarkRegistry, Platform, Framework, BenchmarkType, ReturnCode

# Check for Python version 3.8 or greater and conditionally import PytorchMixtral
if sys.version_info >= (3, 8):
    from superbench.benchmarks.model_benchmarks.pytorch_mixtral import PytorchMixtral


@decorator.cuda_test
@decorator.pytorch_test
def test_pytorch_mixtral_8x7b():
    """Test pytorch-mixtral-8x7b benchmark for float16 train and inference."""
    context = BenchmarkRegistry.create_benchmark_context(
        'mixtral-8x7b',
        platform=Platform.CUDA,
        parameters='--batch_size 1 --seq_len 32 --num_warmup 1 --num_steps 2 --precision float16 \
            --hidden_size 1024 --max_position_embeddings 2048 --intermediate_size 3584 \
            --model_action train inference',
        framework=Framework.PYTORCH
    )

    assert (BenchmarkRegistry.is_benchmark_context_valid(context))

    benchmark = BenchmarkRegistry.launch_benchmark(context)

    # Check basic information.
    assert (benchmark)
    assert (isinstance(benchmark, PytorchMixtral))
    assert (benchmark.name == 'pytorch-mixtral-8x7b')
    assert (benchmark.type == BenchmarkType.MODEL)

    # Check predefined parameters of mixtral-8x7b model.
    assert (benchmark._args.hidden_size == 1024)
    assert (benchmark._args.num_hidden_layers == 32)
    assert (benchmark._args.num_attention_heads == 32)
    assert (benchmark._args.num_key_value_heads == 8)
    assert (benchmark._args.intermediate_size == 3584)
    assert (benchmark._args.max_position_embeddings == 2048)
    assert (benchmark._args.router_aux_loss_coef == 0.02)

    # Check parameters specified in BenchmarkContext.
    assert (benchmark._args.batch_size == 1)
    assert (benchmark._args.num_classes == 100)
    assert (benchmark._args.seq_len == 32)
    assert (benchmark._args.num_warmup == 1)
    assert (benchmark._args.num_steps == 2)

    # Test Dataset.
    assert (len(benchmark._dataset) == benchmark._args.sample_count * benchmark._world_size)

    # Check results and metrics.
    assert (benchmark.run_count == 1)
    assert (benchmark.return_code == ReturnCode.SUCCESS)

    for metric in [
        'fp16_train_step_time', 'fp16_train_throughput', 'fp16_inference_step_time', 'fp16_inference_throughput'
    ]:
        assert (len(benchmark.raw_data[metric]) == benchmark.run_count)
        assert (len(benchmark.raw_data[metric][0]) == benchmark._args.num_steps)
        assert (len(benchmark.result[metric]) == benchmark.run_count)