Unverified Commit cdcbde5f authored by Liangsheng Yin's avatar Liangsheng Yin Committed by GitHub
Browse files

Code structure refactor (#807)

parent 21e22b9e
...@@ -44,7 +44,7 @@ class ServerArgs: ...@@ -44,7 +44,7 @@ class ServerArgs:
max_prefill_tokens: Optional[int] = None max_prefill_tokens: Optional[int] = None
max_running_requests: Optional[int] = None max_running_requests: Optional[int] = None
max_num_reqs: Optional[int] = None max_num_reqs: Optional[int] = None
schedule_heuristic: str = "lpm" schedule_policy: str = "lpm"
schedule_conservativeness: float = 1.0 schedule_conservativeness: float = 1.0
# Other runtime options # Other runtime options
...@@ -231,11 +231,11 @@ class ServerArgs: ...@@ -231,11 +231,11 @@ class ServerArgs:
help="The maximum number of requests to serve in the memory pool. If the model have a large context length, you may need to decrease this value to avoid out-of-memory errors.", help="The maximum number of requests to serve in the memory pool. If the model have a large context length, you may need to decrease this value to avoid out-of-memory errors.",
) )
parser.add_argument( parser.add_argument(
"--schedule-heuristic", "--schedule-policy",
type=str, type=str,
default=ServerArgs.schedule_heuristic, default=ServerArgs.schedule_policy,
choices=["lpm", "random", "fcfs", "dfs-weight"], choices=["lpm", "random", "fcfs", "dfs-weight"],
help="The scheduling heuristic.", help="The scheduling policy of the requests.",
) )
parser.add_argument( parser.add_argument(
"--schedule-conservativeness", "--schedule-conservativeness",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment