"megatron/data/bert_dataset.py" did not exist on "690291dd85d369fbf2495f2f3f0b3c03bd945c31"
global_config.py 1.3 KB
Newer Older
Lianmin Zheng's avatar
Lianmin Zheng committed
1
2
"""Global configurations"""

3
4
import os

Lianmin Zheng's avatar
Lianmin Zheng committed
5
6
7
8
9
10
11
12

class GlobalConfig:
    def __init__(self):
        # Verbosity level
        # 0: do not output anything
        # 2: output final text after every run
        self.verbosity = 0

13
        # Default backend of the language
Lianmin Zheng's avatar
Lianmin Zheng committed
14
15
        self.default_backend = None

16
        # Runtime constants: New generation token ratio estimation
17
18
19
20
21
22
23
24
25
        self.default_init_new_token_ratio = float(
            os.environ.get("SGLANG_INIT_NEW_TOKEN_RATIO", 0.7)
        )
        self.default_min_new_token_ratio_factor = float(
            os.environ.get("SGLANG_MIN_NEW_TOKEN_RATIO_FACTOR", 0.14)
        )
        self.default_new_token_ratio_decay_steps = float(
            os.environ.get("SGLANG_NEW_TOKEN_RATIO_DECAY_STEPS", 600)
        )
26

Mingyi's avatar
Mingyi committed
27
        # Runtime constants: others
Liangsheng Yin's avatar
Liangsheng Yin committed
28
        self.retract_decode_steps = 20
29
30
31
        self.flashinfer_workspace_size = os.environ.get(
            "FLASHINFER_WORKSPACE_SIZE", 384 * 1024 * 1024
        )
32
33

        # Output tokenization configs
Lianmin Zheng's avatar
Lianmin Zheng committed
34
        self.skip_special_tokens_in_output = True
35
        self.spaces_between_special_tokens_in_out = True
Lianmin Zheng's avatar
Lianmin Zheng committed
36

37
        # Interpreter optimization configs
38
        self.enable_precache_with_tracing = True
Lianmin Zheng's avatar
Lianmin Zheng committed
39
40
        self.enable_parallel_encoding = True

41
42
        self.enable_flashinfer_mla = False

43

Lianmin Zheng's avatar
Lianmin Zheng committed
44
global_config = GlobalConfig()