decode.py

# Copyright (c) OpenMMLab. All rights reserved.
import os
import os.path as osp

import fire
import torch

from lmdeploy import turbomind as tm
from lmdeploy.turbomind.tokenizer import Tokenizer

os.environ['TM_LOG_LEVEL'] = 'ERROR'


def main(model_path, inputs):
    """An example to perform model inference through the command line
    interface.

    Args:
        model_path (str): the path of the deployed model
        inputs (str): the path of text file contatin input text lines
    """
    tokenizer_model_path = osp.join(model_path, 'triton_models', 'tokenizer')
    tokenizer = Tokenizer(tokenizer_model_path)
    tm_model = tm.TurboMind(model_path, eos_id=tokenizer.eos_token_id)
    generator = tm_model.create_instance()

    with open(inputs, 'r') as f:
        lines = f.readlines()

    input_ids = [tokenizer.encode(x) for x in lines]

    logits = generator.decode(input_ids)

    top_1 = torch.argmax(logits, -1)

    print(top_1)


if __name__ == '__main__':
    fire.Fire(main)