test_decode.py

import os

import numpy as np
import torch
from transformers import AutoTokenizer

from lmdeploy.pytorch.decode import Engine, decode_single
from lmdeploy.pytorch.model import accel_model, init_model


def _test_decode_dist(model_path, prompt):
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    tokenizer.pad_token_id = tokenizer.eos_token_id
    tokenizer.padding_side = 'right'

    inputs = tokenizer(prompt)
    input_ids = inputs.input_ids

    engine = Engine(model_path, tokenizer=tokenizer)
    probs = engine.decode(input_ids, sort=False, max_bs=1, pad=True)

    return probs


def _test_decode_single(model_path, prompt):
    model, tokenizer = init_model(model_path)
    model = accel_model(model)
    model = model.eval()

    tokenizer.pad_token_id = tokenizer.eos_token_id
    tokenizer.padding_side = 'right'

    inputs = tokenizer(prompt, return_tensors='pt', padding=True)
    input_ids = inputs.input_ids.cuda()
    attention_mask = inputs.attention_mask.cuda()

    probs: torch.Tensor = decode_single(model, input_ids, attention_mask)

    return probs.numpy()


def test_compare(output_outliers=True):
    os.environ['CUDA_VISIBLE_DEVICES'] = '0,1,2,3'
    os.environ['MKL_THREADING_LAYER'] = 'GNU'
    # https://github.com/pytorch/pytorch/issues/37377#issuecomment-629529611

    model_path = 'llama2/huggingface/llama-2-7b'

    prompts = [
        'I believe the meaning of life is to find your gift. The purpose of life is to give it away.',  # noqa: E501
        'Simply put, the theory of relativity states that ',
        'Building a website can be done in 10 simple steps:'
    ]

    p_single = _test_decode_single(model_path, prompts)
    p_dist = _test_decode_dist(model_path, prompts)

    rtol = 2.0e-2
    atol = 2.0e-2
    if output_outliers:
        np.set_printoptions(linewidth=150, edgeitems=5)
        failed = (abs(p_dist - p_single) > atol + rtol * abs(p_single))
        idx = failed.nonzero()
        print(f'Num outliers: {len(idx[0])}')
        print(p_dist[idx])
        print(p_single[idx])

    assert np.allclose(p_dist, p_single, rtol=rtol, atol=atol)