"vscode:/vscode.git/clone" did not exist on "d9eb9358ccf8803253d2f5cf7feafef13b60b8c5"
Unverified Commit f9d72381 authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

Teak mem fraction (#20)

parent bf51ddc6
......@@ -278,7 +278,7 @@ class ModelRunner:
load_format=self.load_format,
revision=None,
)
self.model = model
self.model = model.eval()
def profile_max_num_token(self, total_gpu_memory):
available_gpu_memory = get_available_gpu_memory(
......
......@@ -26,10 +26,14 @@ class ServerArgs:
if self.tokenizer_path is None:
self.tokenizer_path = self.model_path
if self.mem_fraction_static is None:
if self.tp_size > 1:
self.mem_fraction_static = 0.8
if self.tp_size >= 8:
self.mem_fraction_static = 0.80
elif self.tp_size >= 4:
self.mem_fraction_static = 0.82
elif self.tp_size >= 2:
self.mem_fraction_static = 0.85
else:
self.mem_fraction_static = 0.9
self.mem_fraction_static = 0.90
@staticmethod
def add_cli_args(parser: argparse.ArgumentParser):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment