# task type, support train and predict task = train # boosting type, support gbdt for now, alias: boosting, boost boosting_type = gbdt # application type, support following application # regression , regression task # binary , binary classification task # lambdarank , LambdaRank task # alias: application, app objective = binary # eval metrics, support multi metric, delimite by ',' , support following metrics # l1 # l2 , default metric for regression # ndcg , default metric for lambdarank # auc # binary_logloss , default metric for binary # binary_error metric = binary_logloss,auc # frequency for metric output metric_freq = 1 # true if need output metric for training data, alias: tranining_metric, train_metric is_training_metric = true # number of bins for feature bucket, 255 is a recommend setting, it can save memories, and also has good accuracy. max_bin = 255 # training data # if existing weight file, should name to "binary.train.weight" # alias: train_data, train data = binary.train # validation data, support multi validation data, separated by ',' # if existing weight file, should name to "binary.test.weight" # alias: valid, test, test_data, valid_data = binary.test # number of trees(iterations), alias: num_tree, num_iteration, num_iterations, num_round, num_rounds num_trees = 100 # shrinkage rate , alias: shrinkage_rate learning_rate = 0.1 # number of leaves for one tree, alias: num_leaf num_leaves = 63 # type of tree learner, support following types: # serial , single machine version # feature , use feature parallel to train # data , use data parallel to train # voting , use voting based parallel to train # alias: tree tree_learner = feature # number of threads for multi-threading. One thread will use each CPU. The default is the CPU count. # num_threads = 8 # feature sub-sample, will random select 80% feature to train on each iteration # alias: sub_feature feature_fraction = 0.8 # Support bagging (data sub-sample), will perform bagging every 5 iterations bagging_freq = 5 # Bagging fraction, will random select 80% data on bagging # alias: sub_row bagging_fraction = 0.8 # minimal number data for one leaf, use this to deal with over-fit # alias : min_data_per_leaf, min_data min_data_in_leaf = 50 # minimal sum Hessians for one leaf, use this to deal with over-fit min_sum_hessian_in_leaf = 5.0 # save memory and faster speed for sparse feature, alias: is_sparse is_enable_sparse = true # when data is bigger than memory size, set this to true. otherwise set false will have faster speed # alias: two_round_loading, two_round use_two_round_loading = false # true if need to save data to binary file and application will auto load data from binary file next time # alias: is_save_binary, save_binary is_save_binary_file = false # output model file output_model = LightGBM_model.txt # support continuous train from trained gbdt model # input_model= trained_model.txt # output prediction file for predict task # output_result= prediction.txt # number of machines in parallel training, alias: num_machine num_machines = 2 # local listening port in parallel training, alias: local_port local_listen_port = 12400 # machines list file for parallel training, alias: mlist machine_list_file = mlist.txt