Commit 76c44d78 authored by wxchan's avatar wxchan Committed by Guolin Ke
Browse files

refine compatibility (#186)

* add compat.py

* unify types

* range/xrange -> range_

* move argc_ to compat.py; add is_numeric error type

* use simplejson for json

* move json to compat.py

* move pandas to compat.py

* move sklearn to compat.py

* remove unused function

* fix 'unify types'

* argc_ (lambda -> def)
parent aa333925
...@@ -5,28 +5,16 @@ ...@@ -5,28 +5,16 @@
from __future__ import absolute_import from __future__ import absolute_import
import ctypes import ctypes
import json
import os import os
import sys
from tempfile import NamedTemporaryFile from tempfile import NamedTemporaryFile
import numpy as np import numpy as np
import scipy.sparse import scipy.sparse
from .compat import (DataFrame, Series, integer_types, json, numeric_types,
range_, string_type)
from .libpath import find_lib_path from .libpath import find_lib_path
"""pandas"""
try:
from pandas import Series, DataFrame
except ImportError:
class Series(object):
pass
class DataFrame(object):
pass
IS_PY3 = (sys.version_info[0] == 3)
def _load_lib(): def _load_lib():
"""Load LightGBM Library.""" """Load LightGBM Library."""
...@@ -57,28 +45,17 @@ def _safe_call(ret): ...@@ -57,28 +45,17 @@ def _safe_call(ret):
raise LightGBMError(_LIB.LGBM_GetLastError()) raise LightGBMError(_LIB.LGBM_GetLastError())
def is_str(s):
"""Check is a str or not"""
if IS_PY3:
return isinstance(s, str)
else:
return isinstance(s, basestring)
def is_numeric(obj): def is_numeric(obj):
"""Check is a number or not, include numpy number etc.""" """Check is a number or not, include numpy number etc."""
try: try:
float(obj) float(obj)
return True return True
except: except (TypeError, ValueError):
# TypeError: obj is not a string or a number
# ValueError: invalid literal
return False return False
def is_numpy_object(data):
"""Check is numpy object"""
return type(data).__module__ == np.__name__
def is_numpy_1d_array(data): def is_numpy_1d_array(data):
"""Check is 1d numpy array""" """Check is 1d numpy array"""
return isinstance(data, np.ndarray) and len(data.shape) == 1 return isinstance(data, np.ndarray) and len(data.shape) == 1
...@@ -87,7 +64,7 @@ def is_numpy_1d_array(data): ...@@ -87,7 +64,7 @@ def is_numpy_1d_array(data):
def is_1d_list(data): def is_1d_list(data):
"""Check is 1d list""" """Check is 1d list"""
return isinstance(data, list) and \ return isinstance(data, list) and \
(not data or isinstance(data[0], (int, float, bool))) (not data or isinstance(data[0], numeric_types))
def list_to_1d_numpy(data, dtype=np.float32, name='list'): def list_to_1d_numpy(data, dtype=np.float32, name='list'):
...@@ -140,7 +117,7 @@ def param_dict_to_str(data): ...@@ -140,7 +117,7 @@ def param_dict_to_str(data):
for key, val in data.items(): for key, val in data.items():
if isinstance(val, (list, tuple, set)) or is_numpy_1d_array(val): if isinstance(val, (list, tuple, set)) or is_numpy_1d_array(val):
pairs.append(str(key) + '=' + ','.join(map(str, val))) pairs.append(str(key) + '=' + ','.join(map(str, val)))
elif is_str(val) or isinstance(val, (int, float, bool)) or is_numeric(val): elif isinstance(val, string_type) or isinstance(val, numeric_types) or is_numeric(val):
pairs.append(str(key) + '=' + str(val)) pairs.append(str(key) + '=' + str(val))
else: else:
raise TypeError('Unknown type of parameter:%s, got:%s' raise TypeError('Unknown type of parameter:%s, got:%s'
...@@ -314,7 +291,7 @@ class _InnerPredictor(object): ...@@ -314,7 +291,7 @@ class _InnerPredictor(object):
int_data_has_header = 1 if data_has_header else 0 int_data_has_header = 1 if data_has_header else 0
if num_iteration > self.num_total_iteration: if num_iteration > self.num_total_iteration:
num_iteration = self.num_total_iteration num_iteration = self.num_total_iteration
if is_str(data): if isinstance(data, string_type):
with _temp_file() as f: with _temp_file() as f:
_safe_call(_LIB.LGBM_BoosterPredictForFile( _safe_call(_LIB.LGBM_BoosterPredictForFile(
self.handle, self.handle,
...@@ -576,9 +553,9 @@ class Dataset(object): ...@@ -576,9 +553,9 @@ class Dataset(object):
if feature_name is not None: if feature_name is not None:
feature_dict = {name: i for i, name in enumerate(feature_name)} feature_dict = {name: i for i, name in enumerate(feature_name)}
for name in categorical_feature: for name in categorical_feature:
if is_str(name) and name in feature_dict: if isinstance(name, string_type) and name in feature_dict:
categorical_indices.add(feature_dict[name]) categorical_indices.add(feature_dict[name])
elif isinstance(name, int): elif isinstance(name, integer_types):
categorical_indices.add(name) categorical_indices.add(name)
else: else:
raise TypeError("Wrong type({}) or unknown name({}) in categorical_feature" raise TypeError("Wrong type({}) or unknown name({}) in categorical_feature"
...@@ -594,7 +571,7 @@ class Dataset(object): ...@@ -594,7 +571,7 @@ class Dataset(object):
elif reference is not None: elif reference is not None:
raise TypeError('Reference dataset should be None or dataset instance') raise TypeError('Reference dataset should be None or dataset instance')
"""start construct data""" """start construct data"""
if is_str(data): if isinstance(data, string_type):
"""check data has header or not""" """check data has header or not"""
if str(params.get("has_header", "")).lower() == "true" \ if str(params.get("has_header", "")).lower() == "true" \
or str(params.get("header", "")).lower() == "true": or str(params.get("header", "")).lower() == "true":
...@@ -635,8 +612,8 @@ class Dataset(object): ...@@ -635,8 +612,8 @@ class Dataset(object):
# need re group init score # need re group init score
new_init_score = np.zeros(init_score.size, dtype=np.float32) new_init_score = np.zeros(init_score.size, dtype=np.float32)
num_data = self.num_data() num_data = self.num_data()
for i in range(num_data): for i in range_(num_data):
for j in range(self.predictor.num_class): for j in range_(self.predictor.num_class):
new_init_score[j * num_data + i] = init_score[i * self.predictor.num_class + j] new_init_score[j * num_data + i] = init_score[i * self.predictor.num_class + j]
init_score = new_init_score init_score = new_init_score
init_score = init_score.astype(dtype=np.float32, copy=False) init_score = init_score.astype(dtype=np.float32, copy=False)
...@@ -1065,7 +1042,7 @@ class Dataset(object): ...@@ -1065,7 +1042,7 @@ class Dataset(object):
if self.group is not None: if self.group is not None:
# group data from LightGBM is boundaries data, need to convert to group size # group data from LightGBM is boundaries data, need to convert to group size
new_group = [] new_group = []
for i in range(len(self.group) - 1): for i in range_(len(self.group) - 1):
new_group.append(self.group[i + 1] - self.group[i]) new_group.append(self.group[i + 1] - self.group[i])
self.group = new_group self.group = new_group
return self.group return self.group
...@@ -1292,7 +1269,7 @@ class Booster(object): ...@@ -1292,7 +1269,7 @@ class Booster(object):
_safe_call(_LIB.LGBM_BoosterUpdateOneIter( _safe_call(_LIB.LGBM_BoosterUpdateOneIter(
self.handle, self.handle,
ctypes.byref(is_finished))) ctypes.byref(is_finished)))
self.__is_predicted_cur_iter = [False for _ in range(self.__num_dataset)] self.__is_predicted_cur_iter = [False for _ in range_(self.__num_dataset)]
return is_finished.value == 1 return is_finished.value == 1
else: else:
grad, hess = fobj(self.__inner_predict(0), self.train_set) grad, hess = fobj(self.__inner_predict(0), self.train_set)
...@@ -1326,7 +1303,7 @@ class Booster(object): ...@@ -1326,7 +1303,7 @@ class Booster(object):
grad.ctypes.data_as(ctypes.POINTER(ctypes.c_float)), grad.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
hess.ctypes.data_as(ctypes.POINTER(ctypes.c_float)), hess.ctypes.data_as(ctypes.POINTER(ctypes.c_float)),
ctypes.byref(is_finished))) ctypes.byref(is_finished)))
self.__is_predicted_cur_iter = [False for _ in range(self.__num_dataset)] self.__is_predicted_cur_iter = [False for _ in range_(self.__num_dataset)]
return is_finished.value == 1 return is_finished.value == 1
def rollback_one_iter(self): def rollback_one_iter(self):
...@@ -1335,7 +1312,7 @@ class Booster(object): ...@@ -1335,7 +1312,7 @@ class Booster(object):
""" """
_safe_call(_LIB.LGBM_BoosterRollbackOneIter( _safe_call(_LIB.LGBM_BoosterRollbackOneIter(
self.handle)) self.handle))
self.__is_predicted_cur_iter = [False for _ in range(self.__num_dataset)] self.__is_predicted_cur_iter = [False for _ in range_(self.__num_dataset)]
def current_iteration(self): def current_iteration(self):
out_cur_iter = ctypes.c_int(0) out_cur_iter = ctypes.c_int(0)
...@@ -1366,7 +1343,7 @@ class Booster(object): ...@@ -1366,7 +1343,7 @@ class Booster(object):
if data is self.train_set: if data is self.train_set:
data_idx = 0 data_idx = 0
else: else:
for i in range(len(self.valid_sets)): for i in range_(len(self.valid_sets)):
if data is self.valid_sets[i]: if data is self.valid_sets[i]:
data_idx = i + 1 data_idx = i + 1
break break
...@@ -1407,7 +1384,7 @@ class Booster(object): ...@@ -1407,7 +1384,7 @@ class Booster(object):
result: str result: str
Evaluation result list. Evaluation result list.
""" """
return [item for i in range(1, self.__num_dataset) return [item for i in range_(1, self.__num_dataset)
for item in self.__inner_eval(self.name_valid_sets[i - 1], i, feval)] for item in self.__inner_eval(self.name_valid_sets[i - 1], i, feval)]
def save_model(self, filename, num_iteration=-1): def save_model(self, filename, num_iteration=-1):
...@@ -1535,7 +1512,7 @@ class Booster(object): ...@@ -1535,7 +1512,7 @@ class Booster(object):
self.__get_eval_info() self.__get_eval_info()
ret = [] ret = []
if self.__num_inner_eval > 0: if self.__num_inner_eval > 0:
result = np.array([0.0 for _ in range(self.__num_inner_eval)], dtype=np.float64) result = np.array([0.0 for _ in range_(self.__num_inner_eval)], dtype=np.float64)
tmp_out_len = ctypes.c_int(0) tmp_out_len = ctypes.c_int(0)
_safe_call(_LIB.LGBM_BoosterGetEval( _safe_call(_LIB.LGBM_BoosterGetEval(
self.handle, self.handle,
...@@ -1544,7 +1521,7 @@ class Booster(object): ...@@ -1544,7 +1521,7 @@ class Booster(object):
result.ctypes.data_as(ctypes.POINTER(ctypes.c_double)))) result.ctypes.data_as(ctypes.POINTER(ctypes.c_double))))
if tmp_out_len.value != self.__num_inner_eval: if tmp_out_len.value != self.__num_inner_eval:
raise ValueError("Wrong length of eval results") raise ValueError("Wrong length of eval results")
for i in range(self.__num_inner_eval): for i in range_(self.__num_inner_eval):
ret.append((data_name, self.__name_inner_eval[i], result[i], self.__higher_better_inner_eval[i])) ret.append((data_name, self.__name_inner_eval[i], result[i], self.__higher_better_inner_eval[i]))
if feval is not None: if feval is not None:
if data_idx == 0: if data_idx == 0:
...@@ -1572,7 +1549,7 @@ class Booster(object): ...@@ -1572,7 +1549,7 @@ class Booster(object):
else: else:
n_preds = self.valid_sets[data_idx - 1].num_data() * self.__num_class n_preds = self.valid_sets[data_idx - 1].num_data() * self.__num_class
self.__inner_predict_buffer[data_idx] = \ self.__inner_predict_buffer[data_idx] = \
np.array([0.0 for _ in range(n_preds)], dtype=np.float64, copy=False) np.array([0.0 for _ in range_(n_preds)], dtype=np.float64, copy=False)
"""avoid to predict many time in one iteration""" """avoid to predict many time in one iteration"""
if not self.__is_predicted_cur_iter[data_idx]: if not self.__is_predicted_cur_iter[data_idx]:
tmp_out_len = ctypes.c_int64(0) tmp_out_len = ctypes.c_int64(0)
...@@ -1602,7 +1579,7 @@ class Booster(object): ...@@ -1602,7 +1579,7 @@ class Booster(object):
if self.__num_inner_eval > 0: if self.__num_inner_eval > 0:
"""Get name of evals""" """Get name of evals"""
tmp_out_len = ctypes.c_int(0) tmp_out_len = ctypes.c_int(0)
string_buffers = [ctypes.create_string_buffer(255) for i in range(self.__num_inner_eval)] string_buffers = [ctypes.create_string_buffer(255) for i in range_(self.__num_inner_eval)]
ptr_string_buffers = (ctypes.c_char_p * self.__num_inner_eval)(*map(ctypes.addressof, string_buffers)) ptr_string_buffers = (ctypes.c_char_p * self.__num_inner_eval)(*map(ctypes.addressof, string_buffers))
_safe_call(_LIB.LGBM_BoosterGetEvalNames( _safe_call(_LIB.LGBM_BoosterGetEvalNames(
self.handle, self.handle,
...@@ -1611,7 +1588,7 @@ class Booster(object): ...@@ -1611,7 +1588,7 @@ class Booster(object):
if self.__num_inner_eval != tmp_out_len.value: if self.__num_inner_eval != tmp_out_len.value:
raise ValueError("Length of eval names doesn't equal with num_evals") raise ValueError("Length of eval names doesn't equal with num_evals")
self.__name_inner_eval = \ self.__name_inner_eval = \
[string_buffers[i].value.decode() for i in range(self.__num_inner_eval)] [string_buffers[i].value.decode() for i in range_(self.__num_inner_eval)]
self.__higher_better_inner_eval = \ self.__higher_better_inner_eval = \
[name.startswith(('auc', 'ndcg')) for name in self.__name_inner_eval] [name.startswith(('auc', 'ndcg')) for name in self.__name_inner_eval]
...@@ -1642,7 +1619,7 @@ class Booster(object): ...@@ -1642,7 +1619,7 @@ class Booster(object):
""" """
for key, value in kwargs.items(): for key, value in kwargs.items():
if value is not None: if value is not None:
if not is_str(value): if not isinstance(value, string_type):
raise ValueError("Set attr only accepts strings") raise ValueError("Set attr only accepts strings")
self.__attr[key] = value self.__attr[key] = value
else: else:
......
...@@ -4,6 +4,8 @@ from __future__ import absolute_import ...@@ -4,6 +4,8 @@ from __future__ import absolute_import
import collections import collections
from .compat import range_
class EarlyStopException(Exception): class EarlyStopException(Exception):
"""Exception of early stopping. """Exception of early stopping.
...@@ -171,7 +173,7 @@ def early_stopping(stopping_rounds, verbose=True): ...@@ -171,7 +173,7 @@ def early_stopping(stopping_rounds, verbose=True):
msg = "Train until valid scores didn't improve in {} rounds." msg = "Train until valid scores didn't improve in {} rounds."
print(msg.format(stopping_rounds)) print(msg.format(stopping_rounds))
for i in range(len(env.evaluation_result_list)): for i in range_(len(env.evaluation_result_list)):
best_score[i] = float('-inf') best_score[i] = float('-inf')
best_iter[i] = 0 best_iter[i] = 0
if verbose: if verbose:
...@@ -182,7 +184,7 @@ def early_stopping(stopping_rounds, verbose=True): ...@@ -182,7 +184,7 @@ def early_stopping(stopping_rounds, verbose=True):
"""internal function""" """internal function"""
if not best_score: if not best_score:
init(env) init(env)
for i in range(len(env.evaluation_result_list)): for i in range_(len(env.evaluation_result_list)):
score = env.evaluation_result_list[i][2] * factor_to_bigger_better[i] score = env.evaluation_result_list[i][2] * factor_to_bigger_better[i]
if score > best_score[i]: if score > best_score[i]:
best_score[i] = score best_score[i] = score
......
# coding: utf-8
# pylint: disable = C0103
"""Compatibility"""
from __future__ import absolute_import
import inspect
import sys
is_py3 = (sys.version_info[0] == 3)
"""compatibility between python2 and python3"""
if is_py3:
string_type = str
numeric_types = (int, float, bool)
integer_types = int
range_ = range
def argc_(func):
"""return number of arguments of a function"""
return len(inspect.signature(func).parameters)
else:
string_type = basestring
numeric_types = (int, long, float, bool)
integer_types = (int, long)
range_ = xrange
def argc_(func):
"""return number of arguments of a function"""
return len(inspect.getargspec(func).args)
"""json"""
try:
import simplejson as json
except (ImportError, SyntaxError):
# simplejson does not support Python 3.2, it throws a SyntaxError
# because of u'...' Unicode literals.
import json
"""pandas"""
try:
from pandas import Series, DataFrame
except ImportError:
class Series(object):
pass
class DataFrame(object):
pass
"""sklearn"""
try:
from sklearn.base import BaseEstimator
from sklearn.base import RegressorMixin, ClassifierMixin
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import deprecated
try:
from sklearn.model_selection import StratifiedKFold
except ImportError:
from sklearn.cross_validation import StratifiedKFold
SKLEARN_INSTALLED = True
LGBMModelBase = BaseEstimator
LGBMRegressorBase = RegressorMixin
LGBMClassifierBase = ClassifierMixin
LGBMLabelEncoder = LabelEncoder
LGBMDeprecated = deprecated
LGBMStratifiedKFold = StratifiedKFold
except ImportError:
SKLEARN_INSTALLED = False
LGBMModelBase = object
LGBMClassifierBase = object
LGBMRegressorBase = object
LGBMLabelEncoder = None
LGBMDeprecated = None
LGBMStratifiedKFold = None
...@@ -9,7 +9,9 @@ from operator import attrgetter ...@@ -9,7 +9,9 @@ from operator import attrgetter
import numpy as np import numpy as np
from . import callback from . import callback
from .basic import Booster, Dataset, LightGBMError, _InnerPredictor, is_str from .basic import Booster, Dataset, LightGBMError, _InnerPredictor
from .compat import (SKLEARN_INSTALLED, LGBMStratifiedKFold, integer_types,
range_, string_type)
def train(params, train_set, num_boost_round=100, def train(params, train_set, num_boost_round=100,
...@@ -85,7 +87,7 @@ def train(params, train_set, num_boost_round=100, ...@@ -85,7 +87,7 @@ def train(params, train_set, num_boost_round=100,
booster : a trained booster model booster : a trained booster model
""" """
"""create predictor first""" """create predictor first"""
if is_str(init_model): if isinstance(init_model, string_type):
predictor = _InnerPredictor(model_file=init_model) predictor = _InnerPredictor(model_file=init_model)
elif isinstance(init_model, Booster): elif isinstance(init_model, Booster):
predictor = init_model._to_predictor() predictor = init_model._to_predictor()
...@@ -108,7 +110,7 @@ def train(params, train_set, num_boost_round=100, ...@@ -108,7 +110,7 @@ def train(params, train_set, num_boost_round=100,
if valid_sets is not None: if valid_sets is not None:
if isinstance(valid_sets, Dataset): if isinstance(valid_sets, Dataset):
valid_sets = [valid_sets] valid_sets = [valid_sets]
if isinstance(valid_names, str): if isinstance(valid_names, string_type):
valid_names = [valid_names] valid_names = [valid_names]
for i, valid_data in enumerate(valid_sets): for i, valid_data in enumerate(valid_sets):
"""reduce cost for prediction training data""" """reduce cost for prediction training data"""
...@@ -138,7 +140,7 @@ def train(params, train_set, num_boost_round=100, ...@@ -138,7 +140,7 @@ def train(params, train_set, num_boost_round=100,
# Most of legacy advanced options becomes callbacks # Most of legacy advanced options becomes callbacks
if verbose_eval is True: if verbose_eval is True:
callbacks.add(callback.print_evaluation()) callbacks.add(callback.print_evaluation())
elif isinstance(verbose_eval, int): elif isinstance(verbose_eval, integer_types):
callbacks.add(callback.print_evaluation(verbose_eval)) callbacks.add(callback.print_evaluation(verbose_eval))
if early_stopping_rounds is not None: if early_stopping_rounds is not None:
...@@ -163,7 +165,7 @@ def train(params, train_set, num_boost_round=100, ...@@ -163,7 +165,7 @@ def train(params, train_set, num_boost_round=100,
booster.add_valid(valid_set, name_valid_set) booster.add_valid(valid_set, name_valid_set)
"""start training""" """start training"""
for i in range(init_iteration, init_iteration + num_boost_round): for i in range_(init_iteration, init_iteration + num_boost_round):
for cb in callbacks_before_iter: for cb in callbacks_before_iter:
cb(callback.CallbackEnv(model=booster, cb(callback.CallbackEnv(model=booster,
params=params, params=params,
...@@ -217,25 +219,14 @@ class CVBooster(object): ...@@ -217,25 +219,14 @@ class CVBooster(object):
return handlerFunction return handlerFunction
try:
from sklearn.model_selection import StratifiedKFold
SKLEARN_StratifiedKFold = True
except ImportError:
try:
from sklearn.cross_validation import StratifiedKFold
SKLEARN_StratifiedKFold = True
except ImportError:
SKLEARN_StratifiedKFold = False
def _make_n_folds(full_data, nfold, params, seed, fpreproc=None, stratified=False, shuffle=True): def _make_n_folds(full_data, nfold, params, seed, fpreproc=None, stratified=False, shuffle=True):
""" """
Make an n-fold list of Booster from random indices. Make an n-fold list of Booster from random indices.
""" """
np.random.seed(seed) np.random.seed(seed)
if stratified: if stratified:
if SKLEARN_StratifiedKFold: if SKLEARN_INSTALLED:
sfk = StratifiedKFold(n_splits=nfold, shuffle=shuffle, random_state=seed) sfk = LGBMStratifiedKFold(n_splits=nfold, shuffle=shuffle, random_state=seed)
idset = [x[1] for x in sfk.split(X=full_data.get_label(), y=full_data.get_label())] idset = [x[1] for x in sfk.split(X=full_data.get_label(), y=full_data.get_label())]
else: else:
raise LightGBMError('Scikit-learn is required for stratified cv') raise LightGBMError('Scikit-learn is required for stratified cv')
...@@ -244,11 +235,11 @@ def _make_n_folds(full_data, nfold, params, seed, fpreproc=None, stratified=Fals ...@@ -244,11 +235,11 @@ def _make_n_folds(full_data, nfold, params, seed, fpreproc=None, stratified=Fals
if shuffle: if shuffle:
randidx = np.random.permutation(full_data.num_data()) randidx = np.random.permutation(full_data.num_data())
kstep = int(len(randidx) / nfold) kstep = int(len(randidx) / nfold)
idset = [randidx[(i * kstep): min(len(randidx), (i + 1) * kstep)] for i in range(nfold)] idset = [randidx[(i * kstep): min(len(randidx), (i + 1) * kstep)] for i in range_(nfold)]
ret = CVBooster() ret = CVBooster()
for k in range(nfold): for k in range_(nfold):
train_set = full_data.subset(np.concatenate([idset[i] for i in range(nfold) if k != i])) train_set = full_data.subset(np.concatenate([idset[i] for i in range_(nfold) if k != i]))
valid_set = full_data.subset(idset[k]) valid_set = full_data.subset(idset[k])
# run preprocessing on the data set if needed # run preprocessing on the data set if needed
if fpreproc is not None: if fpreproc is not None:
...@@ -341,7 +332,7 @@ def cv(params, train_set, num_boost_round=10, nfold=5, stratified=False, ...@@ -341,7 +332,7 @@ def cv(params, train_set, num_boost_round=10, nfold=5, stratified=False,
if not isinstance(train_set, Dataset): if not isinstance(train_set, Dataset):
raise TypeError("Traninig only accepts Dataset object") raise TypeError("Traninig only accepts Dataset object")
if is_str(init_model): if isinstance(init_model, string_type):
predictor = _InnerPredictor(model_file=init_model) predictor = _InnerPredictor(model_file=init_model)
elif isinstance(init_model, Booster): elif isinstance(init_model, Booster):
predictor = init_model._to_predictor() predictor = init_model._to_predictor()
...@@ -354,7 +345,7 @@ def cv(params, train_set, num_boost_round=10, nfold=5, stratified=False, ...@@ -354,7 +345,7 @@ def cv(params, train_set, num_boost_round=10, nfold=5, stratified=False,
if metrics: if metrics:
params.setdefault('metric', []) params.setdefault('metric', [])
if is_str(metrics): if isinstance(metrics, string_type):
params['metric'].append(metrics) params['metric'].append(metrics)
else: else:
params['metric'].extend(metrics) params['metric'].extend(metrics)
...@@ -373,7 +364,7 @@ def cv(params, train_set, num_boost_round=10, nfold=5, stratified=False, ...@@ -373,7 +364,7 @@ def cv(params, train_set, num_boost_round=10, nfold=5, stratified=False,
callbacks.add(callback.early_stopping(early_stopping_rounds, verbose=False)) callbacks.add(callback.early_stopping(early_stopping_rounds, verbose=False))
if verbose_eval is True: if verbose_eval is True:
callbacks.add(callback.print_evaluation(show_stdv=show_stdv)) callbacks.add(callback.print_evaluation(show_stdv=show_stdv))
elif isinstance(verbose_eval, int): elif isinstance(verbose_eval, integer_types):
callbacks.add(callback.print_evaluation(verbose_eval, show_stdv=show_stdv)) callbacks.add(callback.print_evaluation(verbose_eval, show_stdv=show_stdv))
callbacks_before_iter = {cb for cb in callbacks if getattr(cb, 'before_iteration', False)} callbacks_before_iter = {cb for cb in callbacks if getattr(cb, 'before_iteration', False)}
...@@ -381,7 +372,7 @@ def cv(params, train_set, num_boost_round=10, nfold=5, stratified=False, ...@@ -381,7 +372,7 @@ def cv(params, train_set, num_boost_round=10, nfold=5, stratified=False,
callbacks_before_iter = sorted(callbacks_before_iter, key=attrgetter('order')) callbacks_before_iter = sorted(callbacks_before_iter, key=attrgetter('order'))
callbacks_after_iter = sorted(callbacks_after_iter, key=attrgetter('order')) callbacks_after_iter = sorted(callbacks_after_iter, key=attrgetter('order'))
for i in range(num_boost_round): for i in range_(num_boost_round):
for cb in callbacks_before_iter: for cb in callbacks_before_iter:
cb(callback.CallbackEnv(model=cvfolds, cb(callback.CallbackEnv(model=cvfolds,
params=params, params=params,
......
...@@ -3,38 +3,14 @@ ...@@ -3,38 +3,14 @@
"""Scikit-Learn Wrapper interface for LightGBM.""" """Scikit-Learn Wrapper interface for LightGBM."""
from __future__ import absolute_import from __future__ import absolute_import
import inspect
import numpy as np import numpy as np
from .basic import IS_PY3, Dataset, LightGBMError from .basic import Dataset, LightGBMError
from .compat import (SKLEARN_INSTALLED, LGBMClassifierBase, LGBMDeprecated,
LGBMLabelEncoder, LGBMModelBase, LGBMRegressorBase, argc_,
range_)
from .engine import train from .engine import train
'''sklearn'''
try:
from sklearn.base import BaseEstimator
from sklearn.base import RegressorMixin, ClassifierMixin
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import deprecated
SKLEARN_INSTALLED = True
LGBMModelBase = BaseEstimator
LGBMRegressorBase = RegressorMixin
LGBMClassifierBase = ClassifierMixin
LGBMLabelEncoder = LabelEncoder
except ImportError:
SKLEARN_INSTALLED = False
LGBMModelBase = object
LGBMClassifierBase = object
LGBMRegressorBase = object
LGBMLabelEncoder = None
def _argc(func):
if IS_PY3:
return len(inspect.signature(func).parameters)
else:
return len(inspect.getargspec(func).args)
def _objective_function_wrapper(func): def _objective_function_wrapper(func):
"""Decorate an objective function """Decorate an objective function
...@@ -67,7 +43,7 @@ def _objective_function_wrapper(func): ...@@ -67,7 +43,7 @@ def _objective_function_wrapper(func):
def inner(preds, dataset): def inner(preds, dataset):
"""internal function""" """internal function"""
labels = dataset.get_label() labels = dataset.get_label()
argc = _argc(func) argc = argc_(func)
if argc == 2: if argc == 2:
grad, hess = func(labels, preds) grad, hess = func(labels, preds)
elif argc == 3: elif argc == 3:
...@@ -86,8 +62,8 @@ def _objective_function_wrapper(func): ...@@ -86,8 +62,8 @@ def _objective_function_wrapper(func):
num_class = len(grad) // num_data num_class = len(grad) // num_data
if num_class * num_data != len(grad): if num_class * num_data != len(grad):
raise ValueError("Length of grad and hess should equal to num_class * num_data") raise ValueError("Length of grad and hess should equal to num_class * num_data")
for k in range(num_class): for k in range_(num_class):
for i in range(num_data): for i in range_(num_data):
idx = k * num_data + i idx = k * num_data + i
grad[idx] *= weight[i] grad[idx] *= weight[i]
hess[idx] *= weight[i] hess[idx] *= weight[i]
...@@ -132,7 +108,7 @@ def _eval_function_wrapper(func): ...@@ -132,7 +108,7 @@ def _eval_function_wrapper(func):
def inner(preds, dataset): def inner(preds, dataset):
"""internal function""" """internal function"""
labels = dataset.get_label() labels = dataset.get_label()
argc = _argc(func) argc = argc_(func)
if argc == 2: if argc == 2:
return func(labels, preds) return func(labels, preds)
elif argc == 3: elif argc == 3:
...@@ -490,11 +466,11 @@ class LGBMModel(LGBMModelBase): ...@@ -490,11 +466,11 @@ class LGBMModel(LGBMModelBase):
importace_array = self.booster_.feature_importance().astype(np.float32) importace_array = self.booster_.feature_importance().astype(np.float32)
return importace_array / importace_array.sum() return importace_array / importace_array.sum()
@deprecated('Use attribute booster_ instead.') @LGBMDeprecated('Use attribute booster_ instead.')
def booster(self): def booster(self):
return self.booster_ return self.booster_
@deprecated('Use attribute feature_importance_ instead.') @LGBMDeprecated('Use attribute feature_importance_ instead.')
def feature_importance(self): def feature_importance(self):
return self.feature_importance_ return self.feature_importance_
...@@ -695,7 +671,7 @@ class LGBMRanker(LGBMModel): ...@@ -695,7 +671,7 @@ class LGBMRanker(LGBMModel):
raise ValueError("Eval_group cannot be None when eval_set is not None") raise ValueError("Eval_group cannot be None when eval_set is not None")
elif len(eval_group) != len(eval_set): elif len(eval_group) != len(eval_set):
raise ValueError("Length of eval_group should equal to eval_set") raise ValueError("Length of eval_group should equal to eval_set")
elif (isinstance(eval_group, dict) and any(i not in eval_group or eval_group[i] is None for i in range(len(eval_group)))) \ elif (isinstance(eval_group, dict) and any(i not in eval_group or eval_group[i] is None for i in range_(len(eval_group)))) \
or (isinstance(eval_group, list) and any(group is None for group in eval_group)): or (isinstance(eval_group, list) and any(group is None for group in eval_group)):
raise ValueError("Should set group for all eval dataset for ranking task; if you use dict, the index should start from 0") raise ValueError("Should set group for all eval dataset for ranking task; if you use dict, the index should start from 0")
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment