"vscode:/vscode.git/clone" did not exist on "9c38758896f73caa4056d7c5b4db991eadf4f61f"
inference_api.py 5.16 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
import os
import requests
import base64
import json
import warnings

from typing import List, Optional
from huggingface_hub.utils import build_hf_headers

from text_generation import Client, AsyncClient, __version__
from text_generation.errors import NotSupportedError

INFERENCE_ENDPOINT = os.environ.get(
    "HF_INFERENCE_ENDPOINT", "https://api-inference.huggingface.co"
)

SUPPORTED_MODELS = None


def get_supported_models() -> Optional[List[str]]:
    """
    Get the list of supported text-generation models from GitHub

    Returns:
        Optional[List[str]]: supported models list or None if unable to get the list from GitHub
    """
    global SUPPORTED_MODELS
    if SUPPORTED_MODELS is not None:
        return SUPPORTED_MODELS

    response = requests.get(
        "https://api.github.com/repos/huggingface/text-generation-inference/contents/supported_models.json",
        timeout=5,
    )
    if response.status_code == 200:
        file_content = response.json()["content"]
        SUPPORTED_MODELS = json.loads(base64.b64decode(file_content).decode("utf-8"))
        return SUPPORTED_MODELS

    warnings.warn("Could not retrieve list of supported models.")
    return None


class InferenceAPIClient(Client):
    """Client to make calls to the HuggingFace Inference API.

     Only supports a subset of the available text-generation or text2text-generation models that are served using
     text-generation-inference

     Example:

     ```python
     >>> from text_generation import InferenceAPIClient

     >>> client = InferenceAPIClient("bigscience/bloomz")
     >>> client.generate("Why is the sky blue?").generated_text
     ' Rayleigh scattering'

     >>> result = ""
     >>> for response in client.generate_stream("Why is the sky blue?"):
     >>>     if not response.token.special:
     >>>         result += response.token.text
     >>> result
    ' Rayleigh scattering'
     ```
    """

    def __init__(self, repo_id: str, token: Optional[str] = None, timeout: int = 10):
        """
        Init headers and API information

        Args:
            repo_id (`str`):
                Id of repository (e.g. `bigscience/bloom`).
            token (`str`, `optional`):
                The API token to use as HTTP bearer authorization. This is not
                the authentication token. You can find the token in
                https://huggingface.co/settings/token. Alternatively, you can
                find both your organizations and personal API tokens using
                `HfApi().whoami(token)`.
            timeout (`int`):
                Timeout in seconds
        """

        # Text Generation Inference client only supports a subset of the available hub models
        supported_models = get_supported_models()
        if supported_models is not None and repo_id not in supported_models:
            raise NotSupportedError(repo_id)

        headers = build_hf_headers(
            token=token, library_name="text-generation", library_version=__version__
        )
        base_url = f"{INFERENCE_ENDPOINT}/models/{repo_id}"

95
96
97
        super(InferenceAPIClient, self).__init__(
            base_url, headers=headers, timeout=timeout
        )
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151


class InferenceAPIAsyncClient(AsyncClient):
    """Aynschronous Client to make calls to the HuggingFace Inference API.

     Only supports a subset of the available text-generation or text2text-generation models that are served using
     text-generation-inference

     Example:

     ```python
     >>> from text_generation import InferenceAPIAsyncClient

     >>> client = InferenceAPIAsyncClient("bigscience/bloomz")
     >>> response = await client.generate("Why is the sky blue?")
     >>> response.generated_text
     ' Rayleigh scattering'

     >>> result = ""
     >>> async for response in client.generate_stream("Why is the sky blue?"):
     >>>     if not response.token.special:
     >>>         result += response.token.text
     >>> result
    ' Rayleigh scattering'
     ```
    """

    def __init__(self, repo_id: str, token: Optional[str] = None, timeout: int = 10):
        """
        Init headers and API information

        Args:
            repo_id (`str`):
                Id of repository (e.g. `bigscience/bloom`).
            token (`str`, `optional`):
                The API token to use as HTTP bearer authorization. This is not
                the authentication token. You can find the token in
                https://huggingface.co/settings/token. Alternatively, you can
                find both your organizations and personal API tokens using
                `HfApi().whoami(token)`.
            timeout (`int`):
                Timeout in seconds
        """

        # Text Generation Inference client only supports a subset of the available hub models
        supported_models = get_supported_models()
        if supported_models is not None and repo_id not in supported_models:
            raise NotSupportedError(repo_id)

        headers = build_hf_headers(
            token=token, library_name="text-generation", library_version=__version__
        )
        base_url = f"{INFERENCE_ENDPOINT}/models/{repo_id}"

152
153
154
        super(InferenceAPIAsyncClient, self).__init__(
            base_url, headers=headers, timeout=timeout
        )