Unverified Commit a4bd6818 authored by Atream's avatar Atream Committed by GitHub
Browse files

Merge pull request #1241 from kvcache-ai/fix-cache-lens

fix-cache-lens
parents 8ba7e5d4 7adb7281
import argparse
from ktransformers.server.backend.args import ConfigArgs, default_args
from ktransformers.util.utils import get_free_ports
from transformers import AutoConfig
class ArgumentParser:
def __init__(self, cfg):
......@@ -138,7 +139,11 @@ class ArgumentParser:
self.cfg.server_port = args.port
self.cfg.user_force_think = args.force_think
args.gpu_memory_size = 4*1024*1024*1024 # TODO: set this to the actual GPU memory size
model_config = AutoConfig.from_pretrained(args.model_dir, trust_remote_code=True)
if args.architectures == "Qwen3MoeForCausalLM" or args.architectures == "Qwen2MoeForCausalLM" :
args.gpu_memory_size = args.cache_lens*2*2*model_config.num_hidden_layers*model_config.num_key_value_heads*model_config.head_dim
else:
args.gpu_memory_size = args.cache_lens*2*576*61
self.cfg.gpu_memory_size = args.gpu_memory_size
free_ports = get_free_ports(3, [args.port])
args.sched_port = free_ports[0]
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment