config.yaml 1.27 KB
Newer Older
chenxl's avatar
chenxl committed
1
2
3
4
5
6
7
8
9
log:
  dir: "logs"
  file: "lexllama.log"
  #log level: debug, info, warn, error, crit
  level: "debug"
  backup_count: -1

server:
  ip: 0.0.0.0
10
  port: 10002
chenxl's avatar
chenxl committed
11
12
13
14
15
16
17
18
19
20
21
22
23

db:
  type: "sqllite"
  database: "server.db"
  host: "./"
  pool_size: 10

user:
  secret_key: "981f1dd2a44e27d68759d0252a486568ed43480b4e616a26e3af3709c3a7ce73"
  algorithm: "HS256"

model:
  # type: transformers
24
25
  # type: balance_serve
  type: ktransformers
chenxl's avatar
chenxl committed
26
27

  name: DeepSeek-Coder-V2-Instruct
28
29
  path: deepseek-ai/DeepSeek-V2-Lite-Chat
  gguf_path: ./DeepSeek-V2-Lite-Chat-GGUF
chenxl's avatar
chenxl committed
30
31

  device: cuda:0
32
  cache_lens: 8192
33
  max_new_tokens: 500
chenxl's avatar
chenxl committed
34
35
36
37
38
web:
  mount: False
  open_cross_domain: True

ext:
chenxl's avatar
chenxl committed
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
  cpu_infer: 10

long_context:
  max_seq_len: 32000
  block_size: 128
  local_windows_len: 4096
  second_select_num: 32
  anchor_type: DYNAMIC
  kv_type: FP16
  dense_layer_num: 2
  anchor_num: 1
  preselect_block: True
  head_select_mode: SHARED
  preselect_block_count: 32
  layer_step: 1
54
55
56
  token_step: 

local_chat:
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
  prompt_file: ""

async_server:
  sched_strategy: "FCFS"
  sched_port: 56441
  sched_metrics_port: 54321
  kvc2_metrics_port: 54391
  max_batch_size: 4  # decode count + prefill count, in one mini batch

attn:
  page_size: 256
  chunk_size: 256
kvc2:
  gpu_only: true 
  utilization_percentage: 1.0
  cpu_memory_size_GB: 500