train.conf 3.42 KB
Newer Older
Guolin Ke's avatar
Guolin Ke committed
1
2
3
4
5
6
7
8
9
# task type, support train and predict
task = train

# boosting type, support gbdt for now, alias: boosting, boost
boosting_type = gbdt

# application type, support following application
# regression , regression task
# binary , binary classification task
Andrew Ziem's avatar
Andrew Ziem committed
10
# lambdarank , LambdaRank task
Guolin Ke's avatar
Guolin Ke committed
11
12
13
# alias: application, app
objective = lambdarank

Andrew Ziem's avatar
Andrew Ziem committed
14
# eval metrics, support multi metric, delimited by ',' , support following metrics
15
# l1
Guolin Ke's avatar
Guolin Ke committed
16
17
# l2 , default metric for regression
# ndcg , default metric for lambdarank
18
# auc
Guolin Ke's avatar
Guolin Ke committed
19
20
21
22
23
24
25
# binary_logloss , default metric for binary
# binary_error
metric = ndcg

# evaluation position for ndcg metric, alias : ndcg_at
ndcg_eval_at = 1,3,5

Andrew Ziem's avatar
Andrew Ziem committed
26
# frequency for metric output
Guolin Ke's avatar
Guolin Ke committed
27
28
29
30
31
metric_freq = 1

# true if need output metric for training data, alias: tranining_metric, train_metric
is_training_metric = true

32
33
34
# column in data to use as label
label_column = 0

35
# number of bins for feature bucket, 255 is a recommend setting, it can save memories, and also has good accuracy.
Guolin Ke's avatar
Guolin Ke committed
36
37
38
max_bin = 255

# training data
Andrew Ziem's avatar
Andrew Ziem committed
39
40
# if existing weight file, should name to "rank.train.weight"
# if existing query file, should name to "rank.train.query"
Guolin Ke's avatar
Guolin Ke committed
41
42
43
44
# alias: train_data, train
data = rank.train

# validation data, support multi validation data, separated by ','
Andrew Ziem's avatar
Andrew Ziem committed
45
46
# if existing weight file, should name to "rank.test.weight"
# if existing query file, should name to "rank.test.query"
47
# alias: valid, test, test_data,
Guolin Ke's avatar
Guolin Ke committed
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
valid_data = rank.test

# number of trees(iterations), alias: num_tree, num_iteration, num_iterations, num_round, num_rounds
num_trees = 100

# shrinkage rate , alias: shrinkage_rate
learning_rate = 0.1

# number of leaves for one tree, alias: num_leaf
num_leaves = 31

# type of tree learner, support following types:
# serial , single machine version
# feature , use feature parallel to train
# data , use data parallel to train
# voting , use voting based parallel to train
# alias: tree
tree_learner = serial

67
# number of threads for multi-threading. One thread will use one CPU, defalut is setted to #cpu.
Guolin Ke's avatar
Guolin Ke committed
68
69
# num_threads = 8

70
# feature sub-sample, will random select 80% feature to train on each iteration
Guolin Ke's avatar
Guolin Ke committed
71
72
73
74
75
76
# alias: sub_feature
feature_fraction = 1.0

# Support bagging (data sub-sample), will perform bagging every 5 iterations
bagging_freq = 1

Andrew Ziem's avatar
Andrew Ziem committed
77
# Bagging fraction, will random select 80% data on bagging
Guolin Ke's avatar
Guolin Ke committed
78
79
80
81
82
83
84
# alias: sub_row
bagging_fraction = 0.9

# minimal number data for one leaf, use this to deal with over-fit
# alias : min_data_per_leaf, min_data
min_data_in_leaf = 50

Andrew Ziem's avatar
Andrew Ziem committed
85
# minimal sum Hessians for one leaf, use this to deal with over-fit
Guolin Ke's avatar
Guolin Ke committed
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
min_sum_hessian_in_leaf = 5.0

# save memory and faster speed for sparse feature, alias: is_sparse
is_enable_sparse = true

# when data is bigger than memory size, set this to true. otherwise set false will have faster speed
# alias: two_round_loading, two_round
use_two_round_loading = false

# true if need to save data to binary file and application will auto load data from binary file next time
# alias: is_save_binary, save_binary
is_save_binary_file = false

# output model file
output_model = LightGBM_model.txt

# support continuous train from trained gbdt model
# input_model= trained_model.txt

# output prediction file for predict task
# output_result= prediction.txt


109
# number of machines in distributed training, alias: num_machine
Guolin Ke's avatar
Guolin Ke committed
110
111
num_machines = 1

112
# local listening port in distributed training, alias: local_port
Guolin Ke's avatar
Guolin Ke committed
113
114
local_listen_port = 12400

115
# machines list file for distributed training, alias: mlist
Guolin Ke's avatar
Guolin Ke committed
116
machine_list_file = mlist.txt