import json
import argparse
import requests
import configparser
from typing import Iterable, List


def get_streaming_response(response: requests.Response) -> Iterable[List[str]]:
    for chunk in response.iter_lines(chunk_size=1024, decode_unicode=False,
                                     delimiter=b"\0"):
        if chunk:
            data = json.loads(chunk.decode("utf-8"))
            output = data["text"]
            yield output


def get_response(response: requests.Response) -> List[str]:
    data = json.loads(response.content.decode("utf-8"))
    output = data["text"]
    return output


def clear_line(n: int = 1) -> None:
    LINE_UP = '\033[1A'
    LINE_CLEAR = '\x1b[2K'
    for _ in range(n):
        print(LINE_UP, end=LINE_CLEAR, flush=True)


if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument('--query', default='请写一首诗')
    parser.add_argument('--use_hf', action='store_true')
    parser.add_argument(
        '--config_path', default='../config.ini', help='config目录')
    args = parser.parse_args()

    print(args.query)
    headers = {"Content-Type": "application/json"}
    data = {
        "query": args.query,
        "history": []
    }

    json_str = json.dumps(data)

    config = configparser.ConfigParser()
    config.read(args.config_path)
    stream_chat = config.getboolean('llm', 'stream_chat')

    func = 'vllm_inference'
    if args.use_hf:
        func = 'hf_inference'
    if stream_chat:
        func = 'vllm_inference_stream'

    api_url = f"http://localhost:8888/{func}"

    if stream_chat:
        response = requests.get(api_url, headers=headers, data=json_str.encode(
            "utf-8"), verify=False,  stream=stream_chat)
        num_printed_lines = 0
        for h in get_streaming_response(response):
            clear_line(num_printed_lines)
            num_printed_lines = 0
            for i, line in enumerate(h):
                num_printed_lines += 1
                print(f"Beam candidate {i}: {line!r}", flush=True)
    else:
        response = requests.get(api_url, headers=headers, data=json_str.encode(
            "utf-8"), verify=False,  stream=stream_chat)
        output = get_response(response)
        for i, line in enumerate(output):
            print(f"Beam candidate {i}: {line!r}", flush=True)