"setup.py" did not exist on "a75c0a47cf5521977aa2f68f43320d8a219b8bdc"
inference_request.py 901 Bytes
Newer Older
xingjinliang's avatar
xingjinliang committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
from dataclasses import dataclass
from enum import Enum
from typing import List

import torch

from megatron.core.inference.common_inference_params import CommonInferenceParams


# class syntax
class Status(Enum):
    """Enum for status"""

    WAITING_IN_QUEUE = 1
    ACTIVE_AND_GENERATING_TOKENS = 2
    ACTIVE_BUT_NOT_GENERATING_TOKENS = 3
    COMPLETED = 4


@dataclass
class InferenceRequest:
    """Class for one inference request

    Containing relevant data for an inference request

    """

    request_id: str
    prompt: str
    inference_parameters: CommonInferenceParams
    prompt_tokens: List[int]
    arrival_time: float
    status: Status
    encoder_prompt: str = None
    generated_text: str = None
    generated_tokens: torch.Tensor = None
    generated_log_probs: torch.Tensor = None
    generated_length: int = 0