"vscode:/vscode.git/clone" did not exist on "4cb53ecd0cffceb6dee5c011a58f65997a86f151"
patch.py 1.4 KB
Newer Older
1
import os
2
import weakref
3

4
5
6
7
import nest_asyncio

nest_asyncio.apply()

8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
from sglang.utils import execute_shell_command, reserve_port

DEFAULT_MAX_RUNNING_REQUESTS = 200
DEFAULT_MAX_TOTAL_TOKENS = 20480

import sglang.srt.server_args as server_args_mod

_original_post_init = server_args_mod.ServerArgs.__post_init__


def patched_post_init(self):
    _original_post_init(self)
    if self.max_running_requests is None:
        self.max_running_requests = DEFAULT_MAX_RUNNING_REQUESTS
    if self.max_total_tokens is None:
        self.max_total_tokens = DEFAULT_MAX_TOTAL_TOKENS
    self.disable_cuda_graph = True


server_args_mod.ServerArgs.__post_init__ = patched_post_init

29
30
process_socket_map = weakref.WeakKeyDictionary()

31
32

def launch_server_cmd(command: str, host: str = "0.0.0.0", port: int = None):
33
34
35
36
    """
    Launch the server using the given command.
    If no port is specified, a free port is reserved.
    """
37
    if port is None:
38
39
40
41
        port, lock_socket = reserve_port(host)
    else:
        lock_socket = None

42
43
44
45
46
    extra_flags = (
        f"--max-running-requests {DEFAULT_MAX_RUNNING_REQUESTS} "
        f"--max-total-tokens {DEFAULT_MAX_TOTAL_TOKENS} "
        f"--disable-cuda-graph"
    )
47

48
49
    full_command = f"{command} --port {port} {extra_flags}"
    process = execute_shell_command(full_command)
50
51
52
53

    if lock_socket is not None:
        process_socket_map[process] = lock_socket

54
    return process, port