# coding: utf-8 # pylint: disable = invalid-name, C0111 import lightgbm as lgb import pandas as pd try: import matplotlib.pyplot as plt except ImportError: raise ImportError('You need to install matplotlib for plot_example.py.') # load or create your dataset print('Load data...') df_train = pd.read_csv('../regression/regression.train', header=None, sep='\t') df_test = pd.read_csv('../regression/regression.test', header=None, sep='\t') y_train = df_train[0] X_train = df_train.drop(0, axis=1) y_test = df_test[0] X_test = df_test.drop(0, axis=1) # create dataset for lightgbm lgb_train = lgb.Dataset(X_train, y_train) lgb_test = lgb.Dataset(X_test, y_test, reference=lgb_train) # specify your configurations as a dict params = { 'num_leaves': 5, 'metric': ('l1', 'l2'), 'verbose': 0 } evals_result = {} # to record eval results for plotting print('Start training...') # train gbm = lgb.train(params, lgb_train, num_boost_round=100, valid_sets=[lgb_train, lgb_test], feature_name=['f' + str(i + 1) for i in range(28)], categorical_feature=[21], evals_result=evals_result, verbose_eval=10) print('Plot metrics during training...') ax = lgb.plot_metric(evals_result, metric='l1') plt.show() print('Plot feature importances...') ax = lgb.plot_importance(gbm, max_num_features=10) plt.show() print('Plot 84th tree...') # one tree use categorical feature to split ax = lgb.plot_tree(gbm, tree_index=83, figsize=(20, 8), show_info=['split_gain']) plt.show()