Unverified Commit f9d72381 authored by Lianmin Zheng's avatar Lianmin Zheng Committed by GitHub
Browse files

Teak mem fraction (#20)

parent bf51ddc6
...@@ -278,7 +278,7 @@ class ModelRunner: ...@@ -278,7 +278,7 @@ class ModelRunner:
load_format=self.load_format, load_format=self.load_format,
revision=None, revision=None,
) )
self.model = model self.model = model.eval()
def profile_max_num_token(self, total_gpu_memory): def profile_max_num_token(self, total_gpu_memory):
available_gpu_memory = get_available_gpu_memory( available_gpu_memory = get_available_gpu_memory(
......
...@@ -26,10 +26,14 @@ class ServerArgs: ...@@ -26,10 +26,14 @@ class ServerArgs:
if self.tokenizer_path is None: if self.tokenizer_path is None:
self.tokenizer_path = self.model_path self.tokenizer_path = self.model_path
if self.mem_fraction_static is None: if self.mem_fraction_static is None:
if self.tp_size > 1: if self.tp_size >= 8:
self.mem_fraction_static = 0.8 self.mem_fraction_static = 0.80
elif self.tp_size >= 4:
self.mem_fraction_static = 0.82
elif self.tp_size >= 2:
self.mem_fraction_static = 0.85
else: else:
self.mem_fraction_static = 0.9 self.mem_fraction_static = 0.90
@staticmethod @staticmethod
def add_cli_args(parser: argparse.ArgumentParser): def add_cli_args(parser: argparse.ArgumentParser):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment