Commit 5b539788 authored by Guolin Ke's avatar Guolin Ke
Browse files

fix some pep8 check

parent 1a8c23ed
......@@ -4,8 +4,6 @@ from __future__ import absolute_import
import sys
import os
import ctypes
import collections
import re
import tempfile
import numpy as np
......@@ -59,7 +57,7 @@ def is_1d_list(data):
if not isinstance(data, list):
return False
if len(data) > 0:
if not isinstance(data[0], (int, float, bool) ):
if not isinstance(data[0], (int, float, bool)):
return False
return True
......@@ -108,29 +106,29 @@ def param_dict_to_str(data):
if is_str(val):
pairs.append(str(key)+'='+str(val))
elif isinstance(val, (list, tuple)):
pairs.append(str(key)+'='+','.join(map(str,val)))
pairs.append(str(key)+'='+','.join(map(str, val)))
elif isinstance(val, (int, float, bool)):
pairs.append(str(key)+'='+str(val))
else:
raise TypeError('unknow type of parameter:%s , got:%s' %(key, type(val).__name__))
raise TypeError('unknow type of parameter:%s , got:%s'
% (key, type(val).__name__))
return ' '.join(pairs)
"""marco definition of data type in c_api of LightGBM"""
C_API_DTYPE_FLOAT32 =0
C_API_DTYPE_FLOAT64 =1
C_API_DTYPE_INT32 =2
C_API_DTYPE_INT64 =3
C_API_DTYPE_FLOAT32 = 0
C_API_DTYPE_FLOAT64 = 1
C_API_DTYPE_INT32 = 2
C_API_DTYPE_INT64 = 3
"""Matric is row major in python"""
C_API_IS_ROW_MAJOR =1
C_API_IS_ROW_MAJOR = 1
C_API_PREDICT_NORMAL =0
C_API_PREDICT_RAW_SCORE =1
C_API_PREDICT_LEAF_INDEX =2
C_API_PREDICT_NORMAL = 0
C_API_PREDICT_RAW_SCORE = 1
C_API_PREDICT_LEAF_INDEX = 2
FIELD_TYPE_MAPPER = {"label":C_API_DTYPE_FLOAT32,
"weight":C_API_DTYPE_FLOAT32,
"init_score":C_API_DTYPE_FLOAT32,
"group":C_API_DTYPE_INT32,
}
FIELD_TYPE_MAPPER = {"label": C_API_DTYPE_FLOAT32,
"weight": C_API_DTYPE_FLOAT32,
"init_score": C_API_DTYPE_FLOAT32,
"group": C_API_DTYPE_INT32}
def c_float_array(data):
"""Convert numpy array / list to c float array."""
......@@ -144,7 +142,8 @@ def c_float_array(data):
ptr_data = data.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
type_data = C_API_DTYPE_FLOAT64
else:
raise TypeError("expected np.float32 or np.float64, met type({})".format(data.dtype))
raise TypeError("expected np.float32 or np.float64, met type({})"
.format(data.dtype))
else:
raise TypeError("Unknow type({})".format(type(data).__name__))
return (ptr_data, type_data)
......@@ -161,7 +160,8 @@ def c_int_array(data):
ptr_data = data.ctypes.data_as(ctypes.POINTER(ctypes.c_int64))
type_data = C_API_DTYPE_INT64
else:
raise TypeError("expected np.int32 or np.int64, met type({})".format(data.dtype))
raise TypeError("expected np.int32 or np.int64, met type({})"
.format(data.dtype))
else:
raise TypeError("Unknow type({})".format(type(data).__name__))
return (ptr_data, type_data)
......@@ -169,7 +169,7 @@ def c_int_array(data):
class Predictor(object):
""""A Predictor of LightGBM.
"""
def __init__(self,model_file=None, booster_handle=None, is_manage_handle=True):
def __init__(self, model_file=None, booster_handle=None, is_manage_handle=True):
"""Initialize the Predictor.
Parameters
......@@ -213,7 +213,9 @@ class Predictor(object):
_safe_call(_LIB.LGBM_BoosterFree(self.handle))
def predict(self, data, num_iteration=-1, raw_score=False, pred_leaf=False, data_has_header=False, is_reshape=True):
def predict(self, data, num_iteration=-1,
raw_score=False, pred_leaf=False, data_has_header=False,
is_reshape=True):
"""
Predict logic
......@@ -222,7 +224,7 @@ class Predictor(object):
data : string/numpy array/scipy.sparse
Data source for prediction
When data is string type, it represents the path of txt file,
num_iteration :
num_iteration : int
used iteration for prediction
raw_score : bool
True for predict raw score
......@@ -238,7 +240,8 @@ class Predictor(object):
Prediction result
"""
if isinstance(data, Dataset):
raise TypeError("cannot use Dataset instance for prediction, please use raw data instead")
raise TypeError("cannot use Dataset instance for prediction, \
please use raw data instead")
predict_type = C_API_PREDICT_NORMAL
if raw_score:
predict_type = C_API_PREDICT_RAW_SCORE
......@@ -256,7 +259,7 @@ class Predictor(object):
predict_type,
num_iteration,
c_str(tmp_pred_fname)))
tmp_file = open(tmp_pred_fname,"r")
tmp_file = open(tmp_pred_fname, "r")
lines = tmp_file.readlines()
tmp_file.close()
nrow = len(lines)
......@@ -267,15 +270,19 @@ class Predictor(object):
preds = np.array(preds, copy=False)
os.remove(tmp_pred_fname)
elif isinstance(data, scipy.sparse.csr_matrix):
preds, nrow = self.__pred_for_csr(data, num_iteration, predict_type)
preds, nrow = self.__pred_for_csr(data, num_iteration,
predict_type)
elif isinstance(data, np.ndarray):
preds, nrow = self.__pred_for_np2d(data, num_iteration, predict_type)
preds, nrow = self.__pred_for_np2d(data, num_iteration,
predict_type)
else:
try:
csr = scipy.sparse.csr_matrix(data)
preds, nrow = self.__pred_for_csr(csr, num_iteration, predict_type)
preds, nrow = self.__pred_for_csr(csr, num_iteration,
predict_type)
except:
raise TypeError('can not predict data for type {}'.format(type(data).__name__))
raise TypeError('can not predict data for type {}'.
format(type(data).__name__))
if pred_leaf:
preds = preds.astype(np.int32)
if preds.size != nrow and is_reshape:
......@@ -283,7 +290,8 @@ class Predictor(object):
ncol = int(preds.size / nrow)
preds = preds.reshape(nrow, ncol)
else:
raise ValueError('len of predict result(%d) cannot be divide nrow(%d)' %(preds.size, nrow) )
raise ValueError('len of predict result(%d) cannot be divide nrow (%d)'
% (preds.size, nrow))
return preds
def __get_num_preds(self, num_iteration, nrow, predict_type):
......@@ -308,7 +316,8 @@ class Predictor(object):
"""change non-float data to float data, need to copy"""
data = np.array(mat.reshape(mat.size), dtype=np.float32)
ptr_data, type_ptr_data = c_float_array(data)
n_preds = self.__get_num_preds(num_iteration, mat.shape[0], predict_type)
n_preds = self.__get_num_preds(num_iteration, mat.shape[0],
predict_type)
preds = np.zeros(n_preds, dtype=np.float32)
out_num_preds = ctypes.c_int64(0)
_safe_call(_LIB.LGBM_BoosterPredictForMat(
......@@ -365,10 +374,10 @@ except ImportError:
class DataFrame(object):
pass
PANDAS_DTYPE_MAPPER = {'int8': 'int', 'int16': 'int', 'int32': 'int', 'int64': 'int',
'uint8': 'int', 'uint16': 'int', 'uint32': 'int', 'uint64': 'int',
'float16': 'float', 'float32': 'float', 'float64': 'float',
'bool': 'i'}
PANDAS_DTYPE_MAPPER = {'int8': 'int', 'int16': 'int', 'int32': 'int',
'int64': 'int', 'uint8': 'int', 'uint16': 'int',
'uint32': 'int', 'uint64': 'int', 'float16': 'float',
'float32': 'float', 'float64': 'float', 'bool': 'i'}
def _data_from_pandas(data):
if isinstance(data, DataFrame):
......@@ -1098,7 +1107,7 @@ class Booster(object):
data : string/numpy array/scipy.sparse
Data source for prediction
When data is string type, it represents the path of txt file,
num_iteration :
num_iteration : int
used iteration for prediction
raw_score : bool
True for predict raw score
......@@ -1181,7 +1190,7 @@ class Booster(object):
ctypes.byref(tmp_out_len),
data_ptr))
if tmp_out_len.value != len(self.__inner_predict_buffer[data_idx]):
raise ValueError("incorrect number of predict results for data %d" %(data_idx) )
raise ValueError("incorrect number of predict results for data %d" % (data_idx) )
self.__is_predicted_cur_iter[data_idx] = True
return self.__inner_predict_buffer[data_idx]
......
......@@ -148,7 +148,6 @@ def early_stop(stopping_rounds, verbose=True):
callback : function
The requested callback function.
"""
state = {}
factor_to_bigger_better = {}
best_score = {}
best_iter = {}
......@@ -172,7 +171,6 @@ def early_stop(stopping_rounds, verbose=True):
factor_to_bigger_better[i] = -1.0
if env.evaluation_result_list[i][3]:
factor_to_bigger_better[i] = 1.0
state['best_iter'] = 0
def callback(env):
"""internal function"""
......@@ -188,7 +186,6 @@ def early_stop(stopping_rounds, verbose=True):
'\t'.join([_format_eval_result(x) for x in env.evaluation_result_list]))
else:
if env.iteration - best_iter[i] >= stopping_rounds:
state['best_iter'] = best_iter[i]
if env.model is not None:
env.model.set_attr(best_iteration=str(best_iter[i]))
if verbose:
......
"""Training Library containing training routines of LightGBM."""
from __future__ import absolute_import
import collections
import numpy as np
from .basic import LightGBMError, Predictor, Dataset, Booster, is_str
from . import callback
def _construct_dataset(X_y, reference=None,
params=None, other_fields=None, predictor=None):
params=None, other_fields=None,
predictor=None):
if 'max_bin' in params:
max_bin = int(params['max_bin'])
else:
......@@ -31,10 +31,12 @@ def _construct_dataset(X_y, reference=None,
label = X_y[1]
if reference is None:
ret = Dataset(data, label=label, max_bin=max_bin,
weight=weight, group=group, predictor=predictor, params=params)
weight=weight, group=group,
predictor=predictor, params=params)
else:
ret = reference.create_valid(data, label=label, weight=weight, group=group, params=params)
ret = reference.create_valid(data, label=label, weight=weight,
group=group, params=params)
if init_score is not None:
ret.set_init_score(init_score)
return ret
......@@ -409,6 +411,6 @@ def cv(params, train_data, num_boost_round=10, nfold=5, stratified=False,
evaluation_result_list=res))
except callback.EarlyStopException as e:
for k in results.keys():
results[k] = results[k][:(e.state['best_iter'] + 1)]
results[k] = results[k][:(e.best_iteration + 1)]
break
return results
......@@ -194,7 +194,8 @@ class LGBMModel(LGBMModelBase):
return params
def fit(self, X, y, eval_set=None, eval_metric=None,
early_stopping_rounds=None, verbose=True, train_fields=None, valid_fields=None, other_params=None):
early_stopping_rounds=None, verbose=True,
train_fields=None, valid_fields=None, other_params=None):
"""
Fit the gradient boosting model
......@@ -328,8 +329,10 @@ class LGBMClassifier(LGBMModel, LGBMClassifierBase):
if eval_set is not None:
eval_set = list( (x[0], self._le.transform(x[1])) for x in eval_set )
super(LGBMClassifier, self).fit(X, training_labels, eval_set, eval_metric,
early_stopping_rounds, verbose, train_fields, valid_fields, other_params)
super(LGBMClassifier, self).fit(X, training_labels, eval_set,
eval_metric, early_stopping_rounds,
verbose, train_fields, valid_fields,
other_params)
return self
def predict(self, data, raw_score=False, num_iteration=0):
......@@ -429,5 +432,7 @@ class LGBMRanker(LGBMModel):
self.fobj = None
super(LGBMRanker, self).fit(X, y, eval_set, eval_metric,
early_stopping_rounds, verbose, train_fields, valid_fields, other_params)
early_stopping_rounds, verbose,
train_fields, valid_fields,
other_params)
return self
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment