global_config.py 1.2 KB
Newer Older
Lianmin Zheng's avatar
Lianmin Zheng committed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
"""Global configurations"""


class GlobalConfig:
    def __init__(self):
        # Verbosity level
        # 0: do not output anything
        # 2: output final text after every run
        self.verbosity = 0

        self.default_backend = None

        # Output configs
        self.skip_special_tokens_in_output = True
15
        self.spaces_between_special_tokens_in_out = True
Lianmin Zheng's avatar
Lianmin Zheng committed
16
17
18

        # Optimization configs
        self.eager_fill_image = False
19
        self.enable_precache_with_tracing = True
Lianmin Zheng's avatar
Lianmin Zheng committed
20
21
22
23
24
25
26
27
        self.enable_parallel_encoding = True
        self.enable_parallel_decoding = True

        # Choices: ["no_adjust", "adjust_cache"]
        # no_adjust: Do not adjust the position embedding of KV cache.
        # adjust_cache: Adjust the position embedding of KV cache.
        self.concate_and_append_mode = "no_adjust"

Liangsheng Yin's avatar
Liangsheng Yin committed
28
        # Request dependency time due to network delay
29
        self.request_dependency_delay = 0.02
30
        self.wait_for_new_request_delay = 0.0006
Liangsheng Yin's avatar
Liangsheng Yin committed
31

Liangsheng Yin's avatar
Liangsheng Yin committed
32
33
34
35
36
37
        # New generation token ratio estimation
        self.base_new_token_ratio = 0.4
        self.base_min_new_token_ratio = 0.2
        self.new_token_ratio_decay = 0.0001
        self.new_token_ratio_recovery = 0.05

Lianmin Zheng's avatar
Lianmin Zheng committed
38
39

global_config = GlobalConfig()