test_functions.py

# Copyright (c) 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# See LICENSE for license information.

import pytest

import jax
import jax.numpy as jnp

from utils import assert_allclose
from transformer_engine.jax.flax.module import _apply_low_rank_adaptation
from transformer_engine.jax.flax.module import _normalize_axes
from transformer_engine.jax.flax.transformer import LoRAScope
from transformer_engine.jax.flax.transformer import _canonicalize_lora_scope


class TestLoRA:

    def reference(x, la, lb, pattern, scale):
        out = jnp.einsum(pattern, x, la, lb)
        return out * scale

    @pytest.mark.parametrize("shape", [(32, 1024), (32, 128, 1024)])
    @pytest.mark.parametrize("dtype", [jnp.float32, jnp.bfloat16])
    @pytest.mark.parametrize(
        "axis_features_pattern",
        [((-1,), (1024,), "...h,hr,rk->...k"), ((-1,), (3, 1024), "...h,hkr,krz->...kz")],
    )
    @pytest.mark.parametrize("rank", [32, 16])
    @pytest.mark.parametrize("alpha", [None, 4, 8])
    def test_lora(self, shape, dtype, axis_features_pattern, rank, alpha):
        axis, features, pattern = axis_features_pattern
        axis = _normalize_axes(axis, len(shape))
        shape_in_axis = tuple(shape[ax] for ax in axis)

        key = jax.random.key(1124)
        key, x_key = jax.random.split(key)
        x = jax.random.normal(x_key, shape, dtype)

        key, la_key = jax.random.split(key)
        la_shape = (*shape_in_axis, *features[:-1], rank)
        la = jax.random.normal(la_key, la_shape, dtype)

        key, lb_key = jax.random.split(key)
        lb_shape = (*features[:-1], rank, features[-1])
        lb = jax.random.normal(lb_key, lb_shape, dtype)

        out_target = _apply_low_rank_adaptation(x, axis, features, la, lb, alpha)
        scale_ref = alpha / rank if alpha is not None else 1.0
        out_ref = TestLoRA.reference(x, la, lb, pattern, scale_ref)

        assert_allclose(out_target, out_ref, dtype=dtype)

    @pytest.mark.parametrize(
        "scope_ref_assert",
        [
            ("none", LoRAScope(False, False, False), False),
            ("all", LoRAScope(True, True, True), False),
            ("qkv_proj", LoRAScope(True, False, False), False),
            ("output_proj", LoRAScope(False, True, False), False),
            ("mlp", LoRAScope(False, False, True), False),
            ("exclude_qkv_proj", LoRAScope(False, True, True), False),
            ("exclude_output_proj", LoRAScope(True, False, True), False),
            ("exclude_mlp", LoRAScope(True, True, False), False),
            ("messing_up", LoRAScope(), True),
        ],
    )
    def test_lora_scope_generator(self, scope_ref_assert):
        scope, reference, need_assert = scope_ref_assert
        try:
            lora_scope = _canonicalize_lora_scope(scope)
            assert lora_scope == reference
        except AssertionError as ae:
            assert need_assert, f"{ae.args}"