llama_config.ini 1.34 KB
Newer Older
Li Zhang's avatar
Li Zhang committed
1
2
3
4
[ft_instance_hyperparameter]
data_type=fp16
enable_custom_all_reduce=0
pipeline_para_size=1
5
6
tensor_para_size=1
model_dir=/workspace/models/triton_models/weights/
Li Zhang's avatar
Li Zhang committed
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80


[request]
request_batch_size=8
request_output_len=2048
beam_width=1 ; beam width for beam search
top_k=1 ; k value for top k sampling
top_p=0.0 ; p value for top p sampling
temperature=1.0 ; Use for sampling
repetition_penalty=1.00 ; Use for sampling
presence_penalty=0.0 ; Only one of repetition_penalty and presence_penalty are allowed.
len_penalty=0.0
beam_search_diversity_rate=0.0
; PJLM start/end ids
start_id=0
end_id=1


; --------------------- legacy params -------------------------

; LLaMA start/end ids
; start_id=1
; end_id=2

[4999_llama]
head_num=80
size_per_head=128
vocab_size=65632
num_layer=82
rotary_embedding=128
norm_eps=1e-5
start_id=0
end_id=1
inter_size=27392

[llama_7B]
head_num=32
size_per_head=128
vocab_size=32000
num_layer=32
rotary_embedding=128
start_id=1
end_id=2
inter_size=11008

[llama_13B]
head_num=40
size_per_head=128
vocab_size=32000
num_layer=40
rotary_embedding=128
start_id=1
end_id=2
inter_size=13824

[llama_30B]
head_num=52
size_per_head=128
vocab_size=32000
num_layer=60
rotary_embedding=128
start_id=1
end_id=2
inter_size=17920

[llama_65B]
head_num=64
size_per_head=128
vocab_size=32000
num_layer=80
rotary_embedding=128
start_id=1
end_id=2
inter_size=22016