Commit 5b539788 authored by Guolin Ke's avatar Guolin Ke
Browse files

fix some pep8 check

parent 1a8c23ed
...@@ -4,8 +4,6 @@ from __future__ import absolute_import ...@@ -4,8 +4,6 @@ from __future__ import absolute_import
import sys import sys
import os import os
import ctypes import ctypes
import collections
import re
import tempfile import tempfile
import numpy as np import numpy as np
...@@ -59,7 +57,7 @@ def is_1d_list(data): ...@@ -59,7 +57,7 @@ def is_1d_list(data):
if not isinstance(data, list): if not isinstance(data, list):
return False return False
if len(data) > 0: if len(data) > 0:
if not isinstance(data[0], (int, float, bool) ): if not isinstance(data[0], (int, float, bool)):
return False return False
return True return True
...@@ -108,29 +106,29 @@ def param_dict_to_str(data): ...@@ -108,29 +106,29 @@ def param_dict_to_str(data):
if is_str(val): if is_str(val):
pairs.append(str(key)+'='+str(val)) pairs.append(str(key)+'='+str(val))
elif isinstance(val, (list, tuple)): elif isinstance(val, (list, tuple)):
pairs.append(str(key)+'='+','.join(map(str,val))) pairs.append(str(key)+'='+','.join(map(str, val)))
elif isinstance(val, (int, float, bool)): elif isinstance(val, (int, float, bool)):
pairs.append(str(key)+'='+str(val)) pairs.append(str(key)+'='+str(val))
else: else:
raise TypeError('unknow type of parameter:%s , got:%s' %(key, type(val).__name__)) raise TypeError('unknow type of parameter:%s , got:%s'
% (key, type(val).__name__))
return ' '.join(pairs) return ' '.join(pairs)
"""marco definition of data type in c_api of LightGBM""" """marco definition of data type in c_api of LightGBM"""
C_API_DTYPE_FLOAT32 =0 C_API_DTYPE_FLOAT32 = 0
C_API_DTYPE_FLOAT64 =1 C_API_DTYPE_FLOAT64 = 1
C_API_DTYPE_INT32 =2 C_API_DTYPE_INT32 = 2
C_API_DTYPE_INT64 =3 C_API_DTYPE_INT64 = 3
"""Matric is row major in python""" """Matric is row major in python"""
C_API_IS_ROW_MAJOR =1 C_API_IS_ROW_MAJOR = 1
C_API_PREDICT_NORMAL =0 C_API_PREDICT_NORMAL = 0
C_API_PREDICT_RAW_SCORE =1 C_API_PREDICT_RAW_SCORE = 1
C_API_PREDICT_LEAF_INDEX =2 C_API_PREDICT_LEAF_INDEX = 2
FIELD_TYPE_MAPPER = {"label":C_API_DTYPE_FLOAT32, FIELD_TYPE_MAPPER = {"label": C_API_DTYPE_FLOAT32,
"weight":C_API_DTYPE_FLOAT32, "weight": C_API_DTYPE_FLOAT32,
"init_score":C_API_DTYPE_FLOAT32, "init_score": C_API_DTYPE_FLOAT32,
"group":C_API_DTYPE_INT32, "group": C_API_DTYPE_INT32}
}
def c_float_array(data): def c_float_array(data):
"""Convert numpy array / list to c float array.""" """Convert numpy array / list to c float array."""
...@@ -144,7 +142,8 @@ def c_float_array(data): ...@@ -144,7 +142,8 @@ def c_float_array(data):
ptr_data = data.ctypes.data_as(ctypes.POINTER(ctypes.c_double)) ptr_data = data.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
type_data = C_API_DTYPE_FLOAT64 type_data = C_API_DTYPE_FLOAT64
else: else:
raise TypeError("expected np.float32 or np.float64, met type({})".format(data.dtype)) raise TypeError("expected np.float32 or np.float64, met type({})"
.format(data.dtype))
else: else:
raise TypeError("Unknow type({})".format(type(data).__name__)) raise TypeError("Unknow type({})".format(type(data).__name__))
return (ptr_data, type_data) return (ptr_data, type_data)
...@@ -161,7 +160,8 @@ def c_int_array(data): ...@@ -161,7 +160,8 @@ def c_int_array(data):
ptr_data = data.ctypes.data_as(ctypes.POINTER(ctypes.c_int64)) ptr_data = data.ctypes.data_as(ctypes.POINTER(ctypes.c_int64))
type_data = C_API_DTYPE_INT64 type_data = C_API_DTYPE_INT64
else: else:
raise TypeError("expected np.int32 or np.int64, met type({})".format(data.dtype)) raise TypeError("expected np.int32 or np.int64, met type({})"
.format(data.dtype))
else: else:
raise TypeError("Unknow type({})".format(type(data).__name__)) raise TypeError("Unknow type({})".format(type(data).__name__))
return (ptr_data, type_data) return (ptr_data, type_data)
...@@ -169,7 +169,7 @@ def c_int_array(data): ...@@ -169,7 +169,7 @@ def c_int_array(data):
class Predictor(object): class Predictor(object):
""""A Predictor of LightGBM. """"A Predictor of LightGBM.
""" """
def __init__(self,model_file=None, booster_handle=None, is_manage_handle=True): def __init__(self, model_file=None, booster_handle=None, is_manage_handle=True):
"""Initialize the Predictor. """Initialize the Predictor.
Parameters Parameters
...@@ -213,7 +213,9 @@ class Predictor(object): ...@@ -213,7 +213,9 @@ class Predictor(object):
_safe_call(_LIB.LGBM_BoosterFree(self.handle)) _safe_call(_LIB.LGBM_BoosterFree(self.handle))
def predict(self, data, num_iteration=-1, raw_score=False, pred_leaf=False, data_has_header=False, is_reshape=True): def predict(self, data, num_iteration=-1,
raw_score=False, pred_leaf=False, data_has_header=False,
is_reshape=True):
""" """
Predict logic Predict logic
...@@ -222,7 +224,7 @@ class Predictor(object): ...@@ -222,7 +224,7 @@ class Predictor(object):
data : string/numpy array/scipy.sparse data : string/numpy array/scipy.sparse
Data source for prediction Data source for prediction
When data is string type, it represents the path of txt file, When data is string type, it represents the path of txt file,
num_iteration : num_iteration : int
used iteration for prediction used iteration for prediction
raw_score : bool raw_score : bool
True for predict raw score True for predict raw score
...@@ -238,7 +240,8 @@ class Predictor(object): ...@@ -238,7 +240,8 @@ class Predictor(object):
Prediction result Prediction result
""" """
if isinstance(data, Dataset): if isinstance(data, Dataset):
raise TypeError("cannot use Dataset instance for prediction, please use raw data instead") raise TypeError("cannot use Dataset instance for prediction, \
please use raw data instead")
predict_type = C_API_PREDICT_NORMAL predict_type = C_API_PREDICT_NORMAL
if raw_score: if raw_score:
predict_type = C_API_PREDICT_RAW_SCORE predict_type = C_API_PREDICT_RAW_SCORE
...@@ -256,7 +259,7 @@ class Predictor(object): ...@@ -256,7 +259,7 @@ class Predictor(object):
predict_type, predict_type,
num_iteration, num_iteration,
c_str(tmp_pred_fname))) c_str(tmp_pred_fname)))
tmp_file = open(tmp_pred_fname,"r") tmp_file = open(tmp_pred_fname, "r")
lines = tmp_file.readlines() lines = tmp_file.readlines()
tmp_file.close() tmp_file.close()
nrow = len(lines) nrow = len(lines)
...@@ -267,15 +270,19 @@ class Predictor(object): ...@@ -267,15 +270,19 @@ class Predictor(object):
preds = np.array(preds, copy=False) preds = np.array(preds, copy=False)
os.remove(tmp_pred_fname) os.remove(tmp_pred_fname)
elif isinstance(data, scipy.sparse.csr_matrix): elif isinstance(data, scipy.sparse.csr_matrix):
preds, nrow = self.__pred_for_csr(data, num_iteration, predict_type) preds, nrow = self.__pred_for_csr(data, num_iteration,
predict_type)
elif isinstance(data, np.ndarray): elif isinstance(data, np.ndarray):
preds, nrow = self.__pred_for_np2d(data, num_iteration, predict_type) preds, nrow = self.__pred_for_np2d(data, num_iteration,
predict_type)
else: else:
try: try:
csr = scipy.sparse.csr_matrix(data) csr = scipy.sparse.csr_matrix(data)
preds, nrow = self.__pred_for_csr(csr, num_iteration, predict_type) preds, nrow = self.__pred_for_csr(csr, num_iteration,
predict_type)
except: except:
raise TypeError('can not predict data for type {}'.format(type(data).__name__)) raise TypeError('can not predict data for type {}'.
format(type(data).__name__))
if pred_leaf: if pred_leaf:
preds = preds.astype(np.int32) preds = preds.astype(np.int32)
if preds.size != nrow and is_reshape: if preds.size != nrow and is_reshape:
...@@ -283,7 +290,8 @@ class Predictor(object): ...@@ -283,7 +290,8 @@ class Predictor(object):
ncol = int(preds.size / nrow) ncol = int(preds.size / nrow)
preds = preds.reshape(nrow, ncol) preds = preds.reshape(nrow, ncol)
else: else:
raise ValueError('len of predict result(%d) cannot be divide nrow(%d)' %(preds.size, nrow) ) raise ValueError('len of predict result(%d) cannot be divide nrow (%d)'
% (preds.size, nrow))
return preds return preds
def __get_num_preds(self, num_iteration, nrow, predict_type): def __get_num_preds(self, num_iteration, nrow, predict_type):
...@@ -308,7 +316,8 @@ class Predictor(object): ...@@ -308,7 +316,8 @@ class Predictor(object):
"""change non-float data to float data, need to copy""" """change non-float data to float data, need to copy"""
data = np.array(mat.reshape(mat.size), dtype=np.float32) data = np.array(mat.reshape(mat.size), dtype=np.float32)
ptr_data, type_ptr_data = c_float_array(data) ptr_data, type_ptr_data = c_float_array(data)
n_preds = self.__get_num_preds(num_iteration, mat.shape[0], predict_type) n_preds = self.__get_num_preds(num_iteration, mat.shape[0],
predict_type)
preds = np.zeros(n_preds, dtype=np.float32) preds = np.zeros(n_preds, dtype=np.float32)
out_num_preds = ctypes.c_int64(0) out_num_preds = ctypes.c_int64(0)
_safe_call(_LIB.LGBM_BoosterPredictForMat( _safe_call(_LIB.LGBM_BoosterPredictForMat(
...@@ -365,10 +374,10 @@ except ImportError: ...@@ -365,10 +374,10 @@ except ImportError:
class DataFrame(object): class DataFrame(object):
pass pass
PANDAS_DTYPE_MAPPER = {'int8': 'int', 'int16': 'int', 'int32': 'int', 'int64': 'int', PANDAS_DTYPE_MAPPER = {'int8': 'int', 'int16': 'int', 'int32': 'int',
'uint8': 'int', 'uint16': 'int', 'uint32': 'int', 'uint64': 'int', 'int64': 'int', 'uint8': 'int', 'uint16': 'int',
'float16': 'float', 'float32': 'float', 'float64': 'float', 'uint32': 'int', 'uint64': 'int', 'float16': 'float',
'bool': 'i'} 'float32': 'float', 'float64': 'float', 'bool': 'i'}
def _data_from_pandas(data): def _data_from_pandas(data):
if isinstance(data, DataFrame): if isinstance(data, DataFrame):
...@@ -1098,7 +1107,7 @@ class Booster(object): ...@@ -1098,7 +1107,7 @@ class Booster(object):
data : string/numpy array/scipy.sparse data : string/numpy array/scipy.sparse
Data source for prediction Data source for prediction
When data is string type, it represents the path of txt file, When data is string type, it represents the path of txt file,
num_iteration : num_iteration : int
used iteration for prediction used iteration for prediction
raw_score : bool raw_score : bool
True for predict raw score True for predict raw score
...@@ -1181,7 +1190,7 @@ class Booster(object): ...@@ -1181,7 +1190,7 @@ class Booster(object):
ctypes.byref(tmp_out_len), ctypes.byref(tmp_out_len),
data_ptr)) data_ptr))
if tmp_out_len.value != len(self.__inner_predict_buffer[data_idx]): if tmp_out_len.value != len(self.__inner_predict_buffer[data_idx]):
raise ValueError("incorrect number of predict results for data %d" %(data_idx) ) raise ValueError("incorrect number of predict results for data %d" % (data_idx) )
self.__is_predicted_cur_iter[data_idx] = True self.__is_predicted_cur_iter[data_idx] = True
return self.__inner_predict_buffer[data_idx] return self.__inner_predict_buffer[data_idx]
......
...@@ -148,7 +148,6 @@ def early_stop(stopping_rounds, verbose=True): ...@@ -148,7 +148,6 @@ def early_stop(stopping_rounds, verbose=True):
callback : function callback : function
The requested callback function. The requested callback function.
""" """
state = {}
factor_to_bigger_better = {} factor_to_bigger_better = {}
best_score = {} best_score = {}
best_iter = {} best_iter = {}
...@@ -172,7 +171,6 @@ def early_stop(stopping_rounds, verbose=True): ...@@ -172,7 +171,6 @@ def early_stop(stopping_rounds, verbose=True):
factor_to_bigger_better[i] = -1.0 factor_to_bigger_better[i] = -1.0
if env.evaluation_result_list[i][3]: if env.evaluation_result_list[i][3]:
factor_to_bigger_better[i] = 1.0 factor_to_bigger_better[i] = 1.0
state['best_iter'] = 0
def callback(env): def callback(env):
"""internal function""" """internal function"""
...@@ -188,7 +186,6 @@ def early_stop(stopping_rounds, verbose=True): ...@@ -188,7 +186,6 @@ def early_stop(stopping_rounds, verbose=True):
'\t'.join([_format_eval_result(x) for x in env.evaluation_result_list])) '\t'.join([_format_eval_result(x) for x in env.evaluation_result_list]))
else: else:
if env.iteration - best_iter[i] >= stopping_rounds: if env.iteration - best_iter[i] >= stopping_rounds:
state['best_iter'] = best_iter[i]
if env.model is not None: if env.model is not None:
env.model.set_attr(best_iteration=str(best_iter[i])) env.model.set_attr(best_iteration=str(best_iter[i]))
if verbose: if verbose:
......
"""Training Library containing training routines of LightGBM.""" """Training Library containing training routines of LightGBM."""
from __future__ import absolute_import from __future__ import absolute_import
import collections
import numpy as np import numpy as np
from .basic import LightGBMError, Predictor, Dataset, Booster, is_str from .basic import LightGBMError, Predictor, Dataset, Booster, is_str
from . import callback from . import callback
def _construct_dataset(X_y, reference=None, def _construct_dataset(X_y, reference=None,
params=None, other_fields=None, predictor=None): params=None, other_fields=None,
predictor=None):
if 'max_bin' in params: if 'max_bin' in params:
max_bin = int(params['max_bin']) max_bin = int(params['max_bin'])
else: else:
...@@ -31,10 +31,12 @@ def _construct_dataset(X_y, reference=None, ...@@ -31,10 +31,12 @@ def _construct_dataset(X_y, reference=None,
label = X_y[1] label = X_y[1]
if reference is None: if reference is None:
ret = Dataset(data, label=label, max_bin=max_bin, ret = Dataset(data, label=label, max_bin=max_bin,
weight=weight, group=group, predictor=predictor, params=params) weight=weight, group=group,
predictor=predictor, params=params)
else: else:
ret = reference.create_valid(data, label=label, weight=weight, group=group, params=params) ret = reference.create_valid(data, label=label, weight=weight,
group=group, params=params)
if init_score is not None: if init_score is not None:
ret.set_init_score(init_score) ret.set_init_score(init_score)
return ret return ret
...@@ -409,6 +411,6 @@ def cv(params, train_data, num_boost_round=10, nfold=5, stratified=False, ...@@ -409,6 +411,6 @@ def cv(params, train_data, num_boost_round=10, nfold=5, stratified=False,
evaluation_result_list=res)) evaluation_result_list=res))
except callback.EarlyStopException as e: except callback.EarlyStopException as e:
for k in results.keys(): for k in results.keys():
results[k] = results[k][:(e.state['best_iter'] + 1)] results[k] = results[k][:(e.best_iteration + 1)]
break break
return results return results
...@@ -194,7 +194,8 @@ class LGBMModel(LGBMModelBase): ...@@ -194,7 +194,8 @@ class LGBMModel(LGBMModelBase):
return params return params
def fit(self, X, y, eval_set=None, eval_metric=None, def fit(self, X, y, eval_set=None, eval_metric=None,
early_stopping_rounds=None, verbose=True, train_fields=None, valid_fields=None, other_params=None): early_stopping_rounds=None, verbose=True,
train_fields=None, valid_fields=None, other_params=None):
""" """
Fit the gradient boosting model Fit the gradient boosting model
...@@ -328,8 +329,10 @@ class LGBMClassifier(LGBMModel, LGBMClassifierBase): ...@@ -328,8 +329,10 @@ class LGBMClassifier(LGBMModel, LGBMClassifierBase):
if eval_set is not None: if eval_set is not None:
eval_set = list( (x[0], self._le.transform(x[1])) for x in eval_set ) eval_set = list( (x[0], self._le.transform(x[1])) for x in eval_set )
super(LGBMClassifier, self).fit(X, training_labels, eval_set, eval_metric, super(LGBMClassifier, self).fit(X, training_labels, eval_set,
early_stopping_rounds, verbose, train_fields, valid_fields, other_params) eval_metric, early_stopping_rounds,
verbose, train_fields, valid_fields,
other_params)
return self return self
def predict(self, data, raw_score=False, num_iteration=0): def predict(self, data, raw_score=False, num_iteration=0):
...@@ -429,5 +432,7 @@ class LGBMRanker(LGBMModel): ...@@ -429,5 +432,7 @@ class LGBMRanker(LGBMModel):
self.fobj = None self.fobj = None
super(LGBMRanker, self).fit(X, y, eval_set, eval_metric, super(LGBMRanker, self).fit(X, y, eval_set, eval_metric,
early_stopping_rounds, verbose, train_fields, valid_fields, other_params) early_stopping_rounds, verbose,
train_fields, valid_fields,
other_params)
return self return self
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment