Commit ccf2570c authored by Nikita Titov's avatar Nikita Titov Committed by Qiwei Ye
Browse files

[docs][ci][python] added docstring style test and fixed errors in existing docstrings (#1759)

* added docstring style test and fixed errors in existing docstrings

* hotfix

* hotfix

* fix grammar

* hotfix
parent dfdf8861
...@@ -45,8 +45,9 @@ if [[ $TRAVIS == "true" ]] && [[ $TASK == "check-docs" ]]; then ...@@ -45,8 +45,9 @@ if [[ $TRAVIS == "true" ]] && [[ $TASK == "check-docs" ]]; then
fi fi
if [[ $TASK == "pylint" ]]; then if [[ $TASK == "pylint" ]]; then
conda install -y -n $CONDA_ENV pycodestyle conda install -y -n $CONDA_ENV pycodestyle pydocstyle
pycodestyle --ignore=E501,W503 --exclude=./compute,./.nuget . || exit -1 pycodestyle --ignore=E501,W503 --exclude=./compute,./.nuget . || exit -1
pydocstyle --convention=numpy --add-ignore=D105 --match-dir="^(?!^compute|test|example).*" --match="(?!^test_|setup).*\.py" . || exit -1
exit 0 exit 0
fi fi
......
# coding: utf-8
"""Script for generating files with NuGet package metadata."""
import os import os
import sys import sys
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
# If extensions (or modules to document with autodoc) are in another directory, # If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the # add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute. # documentation root, use os.path.abspath to make it absolute.
"""Sphinx configuration file."""
import datetime import datetime
import os import os
import sys import sys
...@@ -128,4 +128,11 @@ htmlhelp_basename = 'LightGBMdoc' ...@@ -128,4 +128,11 @@ htmlhelp_basename = 'LightGBMdoc'
def setup(app): def setup(app):
"""Add new elements at Sphinx initialization time.
Parameters
----------
app : object
The application object representing the Sphinx process.
"""
app.add_javascript("js/script.js") app.add_javascript("js/script.js")
# coding: utf-8 # coding: utf-8
# pylint: disable = invalid-name, C0111 # pylint: disable = invalid-name, C0111
''' """Comparison of `binary` and `xentropy` objectives.
BLUF: The `xentropy` objective does logistic regression and generalizes BLUF: The `xentropy` objective does logistic regression and generalizes
to the case where labels are probabilistic (i.e. numbers between 0 and 1). to the case where labels are probabilistic (i.e. numbers between 0 and 1).
...@@ -9,7 +10,7 @@ Details: Both `binary` and `xentropy` minimize the log loss and use ...@@ -9,7 +10,7 @@ Details: Both `binary` and `xentropy` minimize the log loss and use
between them with default settings is that `binary` may achieve a slight between them with default settings is that `binary` may achieve a slight
speed improvement by assuming that the labels are binary instead of speed improvement by assuming that the labels are binary instead of
probabilistic. probabilistic.
''' """
import time import time
...@@ -46,19 +47,28 @@ DATA = { ...@@ -46,19 +47,28 @@ DATA = {
################# #################
# Set up a couple of utilities for our experiments # Set up a couple of utilities for our experiments
def log_loss(preds, labels): def log_loss(preds, labels):
''' logarithmic loss with non-necessarily-binary labels ''' """Logarithmic loss with non-necessarily-binary labels."""
log_likelihood = np.sum(labels * np.log(preds)) / len(preds) log_likelihood = np.sum(labels * np.log(preds)) / len(preds)
return -log_likelihood return -log_likelihood
def experiment(objective, label_type, data): def experiment(objective, label_type, data):
''' """Measure performance of an objective.
Measure performance of an objective
:param objective: (str) 'binary' or 'xentropy' Parameters
:param label_type: (str) 'binary' or 'probability' ----------
:param data: DATA objective : string 'binary' or 'xentropy'
:return: dict with experiment summary stats Objective function.
''' label_type : string 'binary' or 'probability'
Type of the label.
data : dict
Data for training.
Returns
-------
result : dict
Experiment summary stats.
"""
np.random.seed(0) np.random.seed(0)
nrounds = 5 nrounds = 5
lgb_data = data['lgb_with_' + label_type + '_labels'] lgb_data = data['lgb_with_' + label_type + '_labels']
......
# coding: utf-8 # coding: utf-8
"""This script generates LightGBM/src/io/config_auto.cpp file """Helper script for generating config file and parameters list.
This script generates LightGBM/src/io/config_auto.cpp file
with list of all parameters, aliases table and other routines with list of all parameters, aliases table and other routines
along with parameters description in LightGBM/docs/Parameters.rst file along with parameters description in LightGBM/docs/Parameters.rst file
from the information in LightGBM/include/LightGBM/config.h file. from the information in LightGBM/include/LightGBM/config.h file.
...@@ -7,7 +9,19 @@ from the information in LightGBM/include/LightGBM/config.h file. ...@@ -7,7 +9,19 @@ from the information in LightGBM/include/LightGBM/config.h file.
import os import os
def GetParameterInfos(config_hpp): def get_parameter_infos(config_hpp):
"""Parse config header file.
Parameters
----------
config_hpp : string
Path to the config header file.
Returns
-------
infos : tuple
Tuple with names and content of sections.
"""
is_inparameter = False is_inparameter = False
parameter_group = None parameter_group = None
cur_key = None cur_key = None
...@@ -63,7 +77,19 @@ def GetParameterInfos(config_hpp): ...@@ -63,7 +77,19 @@ def GetParameterInfos(config_hpp):
return keys, member_infos return keys, member_infos
def GetNames(infos): def get_names(infos):
"""Get names of all parameters.
Parameters
----------
infos : list
Content of the config header file.
Returns
-------
names : list
Names of all parameters.
"""
names = [] names = []
for x in infos: for x in infos:
for y in x: for y in x:
...@@ -71,7 +97,19 @@ def GetNames(infos): ...@@ -71,7 +97,19 @@ def GetNames(infos):
return names return names
def GetAlias(infos): def get_alias(infos):
"""Get aliases of all parameters.
Parameters
----------
infos : list
Content of the config header file.
Returns
-------
pairs : list
List of tuples (param alias, param name).
"""
pairs = [] pairs = []
for x in infos: for x in infos:
for y in x: for y in x:
...@@ -83,7 +121,23 @@ def GetAlias(infos): ...@@ -83,7 +121,23 @@ def GetAlias(infos):
return pairs return pairs
def SetOneVarFromString(name, param_type, checks): def set_one_var_from_string(name, param_type, checks):
"""Construct code for auto config file for one param value.
Parameters
----------
name : string
Name of the parameter.
param_type : string
Type of the parameter.
checks : list
Constraints of the parameter.
Returns
-------
ret : string
Lines of auto config file with getting and checks of one parameter value.
"""
ret = "" ret = ""
univar_mapper = {"int": "GetInt", "double": "GetDouble", "bool": "GetBool", "std::string": "GetString"} univar_mapper = {"int": "GetInt", "double": "GetDouble", "bool": "GetBool", "std::string": "GetString"}
if "vector" not in param_type: if "vector" not in param_type:
...@@ -103,9 +157,33 @@ def SetOneVarFromString(name, param_type, checks): ...@@ -103,9 +157,33 @@ def SetOneVarFromString(name, param_type, checks):
return ret return ret
def GenParameterDescription(sections, descriptions, params_rst): def gen_parameter_description(sections, descriptions, params_rst):
"""Write descriptions of parameters to the documentation file.
Parameters
----------
sections : list
Names of parameters sections.
descriptions : list
Structured descriptions of parameters.
params_rst : string
Path to the file with parameters documentation.
"""
def parse_check(check, reverse=False): def parse_check(check, reverse=False):
"""Parse the constraint.
Parameters
----------
check : string
String representation of the constraint.
reverse : bool, optional (default=False)
Whether to reverse the sign of the constraint.
Returns
-------
pair : tuple
Parsed constraint in the form of tuple (value, sign).
"""
try: try:
idx = 1 idx = 1
float(check[idx:]) float(check[idx:])
...@@ -164,10 +242,24 @@ def GenParameterDescription(sections, descriptions, params_rst): ...@@ -164,10 +242,24 @@ def GenParameterDescription(sections, descriptions, params_rst):
new_params_file.write(after) new_params_file.write(after)
def GenParameterCode(config_hpp, config_out_cpp): def gen_parameter_code(config_hpp, config_out_cpp):
keys, infos = GetParameterInfos(config_hpp) """Generate auto config file.
names = GetNames(infos)
alias = GetAlias(infos) Parameters
----------
config_hpp : string
Path to the config header file.
config_out_cpp : string
Path to the auto config file.
Returns
-------
infos : tuple
Tuple with names and content of sections.
"""
keys, infos = get_parameter_infos(config_hpp)
names = get_names(infos)
alias = get_alias(infos)
str_to_write = "/// This file is auto generated by LightGBM\\helper\\parameter_generator.py from LightGBM\\include\\LightGBM\\config.h file.\n" str_to_write = "/// This file is auto generated by LightGBM\\helper\\parameter_generator.py from LightGBM\\include\\LightGBM\\config.h file.\n"
str_to_write += "#include<LightGBM/config.h>\nnamespace LightGBM {\n" str_to_write += "#include<LightGBM/config.h>\nnamespace LightGBM {\n"
# alias table # alias table
...@@ -192,7 +284,7 @@ def GenParameterCode(config_hpp, config_out_cpp): ...@@ -192,7 +284,7 @@ def GenParameterCode(config_hpp, config_out_cpp):
checks = [] checks = []
if "check" in y: if "check" in y:
checks = y["check"] checks = y["check"]
tmp = SetOneVarFromString(name, param_type, checks) tmp = set_one_var_from_string(name, param_type, checks)
str_to_write += tmp str_to_write += tmp
# tails # tails
str_to_write += "}\n\n" str_to_write += "}\n\n"
...@@ -226,5 +318,5 @@ if __name__ == "__main__": ...@@ -226,5 +318,5 @@ if __name__ == "__main__":
config_hpp = os.path.join(current_dir, os.path.pardir, 'include', 'LightGBM', 'config.h') config_hpp = os.path.join(current_dir, os.path.pardir, 'include', 'LightGBM', 'config.h')
config_out_cpp = os.path.join(current_dir, os.path.pardir, 'src', 'io', 'config_auto.cpp') config_out_cpp = os.path.join(current_dir, os.path.pardir, 'src', 'io', 'config_auto.cpp')
params_rst = os.path.join(current_dir, os.path.pardir, 'docs', 'Parameters.rst') params_rst = os.path.join(current_dir, os.path.pardir, 'docs', 'Parameters.rst')
sections, descriptions = GenParameterCode(config_hpp, config_out_cpp) sections, descriptions = gen_parameter_code(config_hpp, config_out_cpp)
GenParameterDescription(sections, descriptions, params_rst) gen_parameter_description(sections, descriptions, params_rst)
...@@ -151,8 +151,8 @@ Examples ...@@ -151,8 +151,8 @@ Examples
Refer to the walk through examples in `Python guide folder <https://github.com/Microsoft/LightGBM/tree/master/examples/python-guide>`_. Refer to the walk through examples in `Python guide folder <https://github.com/Microsoft/LightGBM/tree/master/examples/python-guide>`_.
Developments Development Guide
------------ -----------------
The code style of Python-package follows `PEP 8 <https://www.python.org/dev/peps/pep-0008/>`_. If you would like to make a contribution and not familiar with PEP 8, please check the PEP 8 style guide first. Otherwise, the check won't pass. You should be careful about: The code style of Python-package follows `PEP 8 <https://www.python.org/dev/peps/pep-0008/>`_. If you would like to make a contribution and not familiar with PEP 8, please check the PEP 8 style guide first. Otherwise, the check won't pass. You should be careful about:
...@@ -166,6 +166,8 @@ The code style of Python-package follows `PEP 8 <https://www.python.org/dev/peps ...@@ -166,6 +166,8 @@ The code style of Python-package follows `PEP 8 <https://www.python.org/dev/peps
E501 (line too long) and W503 (line break occurred before a binary operator) can be ignored. E501 (line too long) and W503 (line break occurred before a binary operator) can be ignored.
Documentation strings (docstrings) are written in the NumPy style.
.. |License| image:: https://img.shields.io/badge/license-MIT-blue.svg .. |License| image:: https://img.shields.io/badge/license-MIT-blue.svg
:target: https://github.com/Microsoft/LightGBM/blob/master/LICENSE :target: https://github.com/Microsoft/LightGBM/blob/master/LICENSE
.. |Python Versions| image:: https://img.shields.io/pypi/pyversions/lightgbm.svg .. |Python Versions| image:: https://img.shields.io/pypi/pyversions/lightgbm.svg
......
# coding: utf-8 # coding: utf-8
"""LightGBM, Light Gradient Boosting Machine. """LightGBM, Light Gradient Boosting Machine.
Contributors: https://github.com/Microsoft/LightGBM/graphs/contributors Contributors: https://github.com/Microsoft/LightGBM/graphs/contributors.
""" """
from __future__ import absolute_import from __future__ import absolute_import
......
# coding: utf-8 # coding: utf-8
# pylint: disable = invalid-name, C0111, C0301 # pylint: disable = invalid-name, C0111, C0301
# pylint: disable = R0912, R0913, R0914, W0105, W0201, W0212 # pylint: disable = R0912, R0913, R0914, W0105, W0201, W0212
"""Wrapper c_api of LightGBM""" """Wrapper for C API of LightGBM."""
from __future__ import absolute_import from __future__ import absolute_import
import copy import copy
...@@ -22,7 +22,7 @@ from .libpath import find_lib_path ...@@ -22,7 +22,7 @@ from .libpath import find_lib_path
def _load_lib(): def _load_lib():
"""Load LightGBM Library.""" """Load LightGBM library."""
lib_path = find_lib_path() lib_path = find_lib_path()
if len(lib_path) == 0: if len(lib_path) == 0:
return None return None
...@@ -35,18 +35,19 @@ _LIB = _load_lib() ...@@ -35,18 +35,19 @@ _LIB = _load_lib()
def _safe_call(ret): def _safe_call(ret):
"""Check the return value of C API call """Check the return value from C API call.
Parameters Parameters
---------- ----------
ret : int ret : int
return value from API calls The return value from C API calls.
""" """
if ret != 0: if ret != 0:
raise LightGBMError(decode_string(_LIB.LGBM_GetLastError())) raise LightGBMError(decode_string(_LIB.LGBM_GetLastError()))
def is_numeric(obj): def is_numeric(obj):
"""Check is a number or not, include numpy number etc.""" """Check whether object is a number or not, include numpy number, etc."""
try: try:
float(obj) float(obj)
return True return True
...@@ -57,18 +58,17 @@ def is_numeric(obj): ...@@ -57,18 +58,17 @@ def is_numeric(obj):
def is_numpy_1d_array(data): def is_numpy_1d_array(data):
"""Check is 1d numpy array""" """Check whether data is a 1-D numpy array."""
return isinstance(data, np.ndarray) and len(data.shape) == 1 return isinstance(data, np.ndarray) and len(data.shape) == 1
def is_1d_list(data): def is_1d_list(data):
"""Check is 1d list""" """Check whether data is a 1-D list."""
return isinstance(data, list) and \ return isinstance(data, list) and (not data or is_numeric(data[0]))
(not data or is_numeric(data[0]))
def list_to_1d_numpy(data, dtype=np.float32, name='list'): def list_to_1d_numpy(data, dtype=np.float32, name='list'):
"""convert to 1d numpy array""" """Convert data to 1-D numpy array."""
if is_numpy_1d_array(data): if is_numpy_1d_array(data):
if data.dtype == dtype: if data.dtype == dtype:
return data return data
...@@ -84,8 +84,7 @@ def list_to_1d_numpy(data, dtype=np.float32, name='list'): ...@@ -84,8 +84,7 @@ def list_to_1d_numpy(data, dtype=np.float32, name='list'):
def cfloat32_array_to_numpy(cptr, length): def cfloat32_array_to_numpy(cptr, length):
"""Convert a ctypes float pointer array to a numpy array. """Convert a ctypes float pointer array to a numpy array."""
"""
if isinstance(cptr, ctypes.POINTER(ctypes.c_float)): if isinstance(cptr, ctypes.POINTER(ctypes.c_float)):
return np.fromiter(cptr, dtype=np.float32, count=length) return np.fromiter(cptr, dtype=np.float32, count=length)
else: else:
...@@ -93,8 +92,7 @@ def cfloat32_array_to_numpy(cptr, length): ...@@ -93,8 +92,7 @@ def cfloat32_array_to_numpy(cptr, length):
def cfloat64_array_to_numpy(cptr, length): def cfloat64_array_to_numpy(cptr, length):
"""Convert a ctypes double pointer array to a numpy array. """Convert a ctypes double pointer array to a numpy array."""
"""
if isinstance(cptr, ctypes.POINTER(ctypes.c_double)): if isinstance(cptr, ctypes.POINTER(ctypes.c_double)):
return np.fromiter(cptr, dtype=np.float64, count=length) return np.fromiter(cptr, dtype=np.float64, count=length)
else: else:
...@@ -102,8 +100,7 @@ def cfloat64_array_to_numpy(cptr, length): ...@@ -102,8 +100,7 @@ def cfloat64_array_to_numpy(cptr, length):
def cint32_array_to_numpy(cptr, length): def cint32_array_to_numpy(cptr, length):
"""Convert a ctypes float pointer array to a numpy array. """Convert a ctypes int pointer array to a numpy array."""
"""
if isinstance(cptr, ctypes.POINTER(ctypes.c_int32)): if isinstance(cptr, ctypes.POINTER(ctypes.c_int32)):
return np.fromiter(cptr, dtype=np.int32, count=length) return np.fromiter(cptr, dtype=np.int32, count=length)
else: else:
...@@ -111,16 +108,17 @@ def cint32_array_to_numpy(cptr, length): ...@@ -111,16 +108,17 @@ def cint32_array_to_numpy(cptr, length):
def c_str(string): def c_str(string):
"""Convert a python string to cstring.""" """Convert a Python string to C string."""
return ctypes.c_char_p(string.encode('utf-8')) return ctypes.c_char_p(string.encode('utf-8'))
def c_array(ctype, values): def c_array(ctype, values):
"""Convert a python array to c array.""" """Convert a Python array to C array."""
return (ctype * len(values))(*values) return (ctype * len(values))(*values)
def param_dict_to_str(data): def param_dict_to_str(data):
"""Convert Python dictionary to string, which is passed to C API."""
if data is None or not data: if data is None or not data:
return "" return ""
pairs = [] pairs = []
...@@ -156,28 +154,29 @@ class _TempFile(object): ...@@ -156,28 +154,29 @@ class _TempFile(object):
class LightGBMError(Exception): class LightGBMError(Exception):
"""Error throwed by LightGBM""" """Error thrown by LightGBM."""
pass pass
MAX_INT32 = (1 << 31) - 1 MAX_INT32 = (1 << 31) - 1
"""marco definition of data type in c_api of LightGBM""" """Macro definition of data type in C API of LightGBM"""
C_API_DTYPE_FLOAT32 = 0 C_API_DTYPE_FLOAT32 = 0
C_API_DTYPE_FLOAT64 = 1 C_API_DTYPE_FLOAT64 = 1
C_API_DTYPE_INT32 = 2 C_API_DTYPE_INT32 = 2
C_API_DTYPE_INT64 = 3 C_API_DTYPE_INT64 = 3
"""Matric is row major in python""" """Matrix is row major in Python"""
C_API_IS_ROW_MAJOR = 1 C_API_IS_ROW_MAJOR = 1
"""marco definition of prediction type in c_api of LightGBM""" """Macro definition of prediction type in C API of LightGBM"""
C_API_PREDICT_NORMAL = 0 C_API_PREDICT_NORMAL = 0
C_API_PREDICT_RAW_SCORE = 1 C_API_PREDICT_RAW_SCORE = 1
C_API_PREDICT_LEAF_INDEX = 2 C_API_PREDICT_LEAF_INDEX = 2
C_API_PREDICT_CONTRIB = 3 C_API_PREDICT_CONTRIB = 3
"""data type of data field""" """Data type of data field"""
FIELD_TYPE_MAPPER = {"label": C_API_DTYPE_FLOAT32, FIELD_TYPE_MAPPER = {"label": C_API_DTYPE_FLOAT32,
"weight": C_API_DTYPE_FLOAT32, "weight": C_API_DTYPE_FLOAT32,
"init_score": C_API_DTYPE_FLOAT64, "init_score": C_API_DTYPE_FLOAT64,
...@@ -185,12 +184,12 @@ FIELD_TYPE_MAPPER = {"label": C_API_DTYPE_FLOAT32, ...@@ -185,12 +184,12 @@ FIELD_TYPE_MAPPER = {"label": C_API_DTYPE_FLOAT32,
PANDAS_DTYPE_MAPPER = {'int8': 'int', 'int16': 'int', 'int32': 'int', PANDAS_DTYPE_MAPPER = {'int8': 'int', 'int16': 'int', 'int32': 'int',
'int64': 'int', 'uint8': 'int', 'uint16': 'int', 'int64': 'int', 'uint8': 'int', 'uint16': 'int',
'uint32': 'int', 'uint64': 'int', 'float16': 'float', 'uint32': 'int', 'uint64': 'int', 'bool': 'int',
'float32': 'float', 'float64': 'float', 'bool': 'int'} 'float16': 'float', 'float32': 'float', 'float64': 'float'}
def convert_from_sliced_object(data): def convert_from_sliced_object(data):
"""fix the memory of multi-dimensional sliced object""" """Fix the memory of multi-dimensional sliced object."""
if data.base is not None and isinstance(data, np.ndarray) and isinstance(data.base, np.ndarray): if data.base is not None and isinstance(data, np.ndarray) and isinstance(data.base, np.ndarray):
if not data.flags.c_contiguous: if not data.flags.c_contiguous:
warnings.warn("Usage of np.ndarray subset (sliced data) is not recommended " warnings.warn("Usage of np.ndarray subset (sliced data) is not recommended "
...@@ -200,7 +199,7 @@ def convert_from_sliced_object(data): ...@@ -200,7 +199,7 @@ def convert_from_sliced_object(data):
def c_float_array(data): def c_float_array(data):
"""get pointer of float numpy array / list""" """Get pointer of float numpy array / list."""
if is_1d_list(data): if is_1d_list(data):
data = np.array(data, copy=False) data = np.array(data, copy=False)
if is_numpy_1d_array(data): if is_numpy_1d_array(data):
...@@ -221,7 +220,7 @@ def c_float_array(data): ...@@ -221,7 +220,7 @@ def c_float_array(data):
def c_int_array(data): def c_int_array(data):
"""get pointer of int numpy array / list""" """Get pointer of int numpy array / list."""
if is_1d_list(data): if is_1d_list(data):
data = np.array(data, copy=False) data = np.array(data, copy=False)
if is_numpy_1d_array(data): if is_numpy_1d_array(data):
...@@ -314,22 +313,27 @@ def _load_pandas_categorical(file_name): ...@@ -314,22 +313,27 @@ def _load_pandas_categorical(file_name):
class _InnerPredictor(object): class _InnerPredictor(object):
"""_InnerPredictor of LightGBM.
Not exposed to user.
Used only for prediction, usually used for continued training.
Note
----
Can be converted from Booster, but cannot be converted to Booster.
""" """
A _InnerPredictor of LightGBM.
Only used for prediction, usually used for continued-train
Note: Can convert from Booster, but cannot convert to Booster
"""
def __init__(self, model_file=None, booster_handle=None, pred_parameter=None): def __init__(self, model_file=None, booster_handle=None, pred_parameter=None):
"""Initialize the _InnerPredictor. Not exposed to user """Initialize the _InnerPredictor.
Parameters Parameters
---------- ----------
model_file : string model_file : string or None, optional (default=None)
Path to the model file. Path to the model file.
booster_handle : Handle of Booster booster_handle : object or None, optional (default=None)
use handle to init Handle of Booster.
pred_parameter: dict pred_parameter: dict or None, optional (default=None)
Other parameters for the prediciton Other parameters for the prediciton.
""" """
self.handle = ctypes.c_void_p() self.handle = ctypes.c_void_p()
self.__is_manage_handle = True self.__is_manage_handle = True
...@@ -382,30 +386,31 @@ class _InnerPredictor(object): ...@@ -382,30 +386,31 @@ class _InnerPredictor(object):
def predict(self, data, num_iteration=-1, def predict(self, data, num_iteration=-1,
raw_score=False, pred_leaf=False, pred_contrib=False, data_has_header=False, raw_score=False, pred_leaf=False, pred_contrib=False, data_has_header=False,
is_reshape=True): is_reshape=True):
""" """Predict logic.
Predict logic
Parameters Parameters
---------- ----------
data : string, numpy array, pandas DataFrame or scipy.sparse data : string, numpy array, pandas DataFrame or scipy.sparse
Data source for prediction Data source for prediction.
When data type is string, it represents the path of txt file When data type is string, it represents the path of txt file.
num_iteration : int num_iteration : int, optional (default=-1)
Used iteration for prediction Iteration used for prediction.
raw_score : bool raw_score : bool, optional (default=False)
True for predict raw score Whether to predict raw scores.
pred_leaf : bool pred_leaf : bool, optional (default=False)
True for predict leaf index Whether to predict leaf index.
pred_contrib : bool pred_contrib : bool, optional (default=False)
True for predict feature contributions Whether to predict feature contributions.
data_has_header : bool data_has_header : bool, optional (default=False)
Used for txt data, True if txt data has header Whether data has header.
is_reshape : bool Used only for txt data.
Reshape to (nrow, ncol) if true is_reshape : bool, optional (default=True)
Whether to reshape to (nrow, ncol).
Returns Returns
------- -------
Prediction result result : numpy array
Prediction result.
""" """
if isinstance(data, Dataset): if isinstance(data, Dataset):
raise TypeError("Cannot use Dataset instance for prediction, please use raw data instead") raise TypeError("Cannot use Dataset instance for prediction, please use raw data instead")
...@@ -465,9 +470,7 @@ class _InnerPredictor(object): ...@@ -465,9 +470,7 @@ class _InnerPredictor(object):
return preds return preds
def __get_num_preds(self, num_iteration, nrow, predict_type): def __get_num_preds(self, num_iteration, nrow, predict_type):
""" """Get size of prediction result."""
Get size of prediction result
"""
if nrow > MAX_INT32: if nrow > MAX_INT32:
raise LightGBMError('LightGBM cannot perform prediction for data' raise LightGBMError('LightGBM cannot perform prediction for data'
'with number of rows greater than MAX_INT32 (%d).\n' 'with number of rows greater than MAX_INT32 (%d).\n'
...@@ -483,9 +486,7 @@ class _InnerPredictor(object): ...@@ -483,9 +486,7 @@ class _InnerPredictor(object):
return n_preds.value return n_preds.value
def __pred_for_np2d(self, mat, num_iteration, predict_type): def __pred_for_np2d(self, mat, num_iteration, predict_type):
""" """Predict for a 2-D numpy matrix."""
Predict for a 2-D numpy matrix.
"""
if len(mat.shape) != 2: if len(mat.shape) != 2:
raise ValueError('Input numpy.ndarray or list must be 2 dimensional') raise ValueError('Input numpy.ndarray or list must be 2 dimensional')
...@@ -534,9 +535,7 @@ class _InnerPredictor(object): ...@@ -534,9 +535,7 @@ class _InnerPredictor(object):
return inner_predict(mat, num_iteration, predict_type) return inner_predict(mat, num_iteration, predict_type)
def __pred_for_csr(self, csr, num_iteration, predict_type): def __pred_for_csr(self, csr, num_iteration, predict_type):
""" """Predict for a CSR data."""
Predict for a csr data
"""
def inner_predict(csr, num_iteration, predict_type, preds=None): def inner_predict(csr, num_iteration, predict_type, preds=None):
nrow = len(csr.indptr) - 1 nrow = len(csr.indptr) - 1
n_preds = self.__get_num_preds(num_iteration, nrow, predict_type) n_preds = self.__get_num_preds(num_iteration, nrow, predict_type)
...@@ -587,9 +586,7 @@ class _InnerPredictor(object): ...@@ -587,9 +586,7 @@ class _InnerPredictor(object):
return inner_predict(csr, num_iteration, predict_type) return inner_predict(csr, num_iteration, predict_type)
def __pred_for_csc(self, csc, num_iteration, predict_type): def __pred_for_csc(self, csc, num_iteration, predict_type):
""" """Predict for a CSC data."""
Predict for a csc data
"""
nrow = csc.shape[0] nrow = csc.shape[0]
if nrow > MAX_INT32: if nrow > MAX_INT32:
return self.__pred_for_csr(csc.tocsr(), num_iteration, predict_type) return self.__pred_for_csr(csc.tocsr(), num_iteration, predict_type)
...@@ -625,18 +622,19 @@ class _InnerPredictor(object): ...@@ -625,18 +622,19 @@ class _InnerPredictor(object):
class Dataset(object): class Dataset(object):
"""Dataset in LightGBM.""" """Dataset in LightGBM."""
def __init__(self, data, label=None, reference=None, def __init__(self, data, label=None, reference=None,
weight=None, group=None, init_score=None, silent=False, weight=None, group=None, init_score=None, silent=False,
feature_name='auto', categorical_feature='auto', params=None, feature_name='auto', categorical_feature='auto', params=None,
free_raw_data=True): free_raw_data=True):
"""Construct Dataset. """Initialize Dataset.
Parameters Parameters
---------- ----------
data : string, numpy array, pandas DataFrame, scipy.sparse or list of numpy arrays data : string, numpy array, pandas DataFrame, scipy.sparse or list of numpy arrays
Data source of Dataset. Data source of Dataset.
If string, it represents the path to txt file. If string, it represents the path to txt file.
label : list, numpy 1-D array, pandas one-column DataFrame/Series or None, optional (default=None) label : list, numpy 1-D array, pandas Series / one-column DataFrame or None, optional (default=None)
Label of the data. Label of the data.
reference : Dataset or None, optional (default=None) reference : Dataset or None, optional (default=None)
If this is Dataset for validation, training data should be used as reference. If this is Dataset for validation, training data should be used as reference.
...@@ -660,7 +658,7 @@ class Dataset(object): ...@@ -660,7 +658,7 @@ class Dataset(object):
Large values could be memory consuming. Consider using consecutive integers starting from zero. Large values could be memory consuming. Consider using consecutive integers starting from zero.
All negative values in categorical features will be treated as missing values. All negative values in categorical features will be treated as missing values.
params : dict or None, optional (default=None) params : dict or None, optional (default=None)
Other parameters. Other parameters for Dataset.
free_raw_data : bool, optional (default=True) free_raw_data : bool, optional (default=True)
If True, raw data is freed after constructing inner Dataset. If True, raw data is freed after constructing inner Dataset.
""" """
...@@ -810,9 +808,7 @@ class Dataset(object): ...@@ -810,9 +808,7 @@ class Dataset(object):
return self.set_feature_name(feature_name) return self.set_feature_name(feature_name)
def __init_from_np2d(self, mat, params_str, ref_dataset): def __init_from_np2d(self, mat, params_str, ref_dataset):
""" """Initialize data from a 2-D numpy matrix."""
Initialize data from a 2-D numpy matrix.
"""
if len(mat.shape) != 2: if len(mat.shape) != 2:
raise ValueError('Input numpy.ndarray must be 2 dimensional') raise ValueError('Input numpy.ndarray must be 2 dimensional')
...@@ -836,9 +832,7 @@ class Dataset(object): ...@@ -836,9 +832,7 @@ class Dataset(object):
return self return self
def __init_from_list_np2d(self, mats, params_str, ref_dataset): def __init_from_list_np2d(self, mats, params_str, ref_dataset):
""" """Initialize data from a list of 2-D numpy matrices."""
Initialize data from list of 2-D numpy matrices.
"""
ncol = mats[0].shape[1] ncol = mats[0].shape[1]
nrow = np.zeros((len(mats),), np.int32) nrow = np.zeros((len(mats),), np.int32)
if mats[0].dtype == np.float64: if mats[0].dtype == np.float64:
...@@ -885,9 +879,7 @@ class Dataset(object): ...@@ -885,9 +879,7 @@ class Dataset(object):
return self return self
def __init_from_csr(self, csr, params_str, ref_dataset): def __init_from_csr(self, csr, params_str, ref_dataset):
""" """Initialize data from a CSR matrix."""
Initialize data from a CSR matrix.
"""
if len(csr.indices) != len(csr.data): if len(csr.indices) != len(csr.data):
raise ValueError('Length mismatch: {} vs {}'.format(len(csr.indices), len(csr.data))) raise ValueError('Length mismatch: {} vs {}'.format(len(csr.indices), len(csr.data)))
self.handle = ctypes.c_void_p() self.handle = ctypes.c_void_p()
...@@ -913,9 +905,7 @@ class Dataset(object): ...@@ -913,9 +905,7 @@ class Dataset(object):
return self return self
def __init_from_csc(self, csc, params_str, ref_dataset): def __init_from_csc(self, csc, params_str, ref_dataset):
""" """Initialize data from a CSC matrix."""
Initialize data from a csc matrix.
"""
if len(csc.indices) != len(csc.data): if len(csc.indices) != len(csc.data):
raise ValueError('Length mismatch: {} vs {}'.format(len(csc.indices), len(csc.data))) raise ValueError('Length mismatch: {} vs {}'.format(len(csc.indices), len(csc.data)))
self.handle = ctypes.c_void_p() self.handle = ctypes.c_void_p()
...@@ -996,7 +986,7 @@ class Dataset(object): ...@@ -996,7 +986,7 @@ class Dataset(object):
data : string, numpy array, pandas DataFrame, scipy.sparse or list of numpy arrays data : string, numpy array, pandas DataFrame, scipy.sparse or list of numpy arrays
Data source of Dataset. Data source of Dataset.
If string, it represents the path to txt file. If string, it represents the path to txt file.
label : list, numpy 1-D array, pandas one-column DataFrame/Series or None, optional (default=None) label : list, numpy 1-D array, pandas Series / one-column DataFrame or None, optional (default=None)
Label of the data. Label of the data.
weight : list, numpy 1-D array, pandas Series or None, optional (default=None) weight : list, numpy 1-D array, pandas Series or None, optional (default=None)
Weight for each instance. Weight for each instance.
...@@ -1007,7 +997,7 @@ class Dataset(object): ...@@ -1007,7 +997,7 @@ class Dataset(object):
silent : bool, optional (default=False) silent : bool, optional (default=False)
Whether to print messages during construction. Whether to print messages during construction.
params : dict or None, optional (default=None) params : dict or None, optional (default=None)
Other parameters. Other parameters for validation Dataset.
Returns Returns
------- -------
...@@ -1029,7 +1019,7 @@ class Dataset(object): ...@@ -1029,7 +1019,7 @@ class Dataset(object):
used_indices : list of int used_indices : list of int
Indices used to create the subset. Indices used to create the subset.
params : dict or None, optional (default=None) params : dict or None, optional (default=None)
Other parameters. These parameters will be passed to Dataset constructor.
Returns Returns
------- -------
...@@ -1193,9 +1183,10 @@ class Dataset(object): ...@@ -1193,9 +1183,10 @@ class Dataset(object):
"set free_raw_data=False when construct Dataset to avoid this.") "set free_raw_data=False when construct Dataset to avoid this.")
def _set_predictor(self, predictor): def _set_predictor(self, predictor):
""" """Set predictor for continued training.
Set predictor for continued training, not recommended for user to call this function.
Please set init_model in engine.train or engine.cv It is not recommended for user to call this function.
Please use init_model argument in engine.train() or engine.cv() instead.
""" """
if predictor is self._predictor: if predictor is self._predictor:
return self return self
...@@ -1259,11 +1250,11 @@ class Dataset(object): ...@@ -1259,11 +1250,11 @@ class Dataset(object):
return self return self
def set_label(self, label): def set_label(self, label):
"""Set label of Dataset """Set label of Dataset.
Parameters Parameters
---------- ----------
label : list, numpy 1-D array, pandas one-column DataFrame/Series or None label : list, numpy 1-D array, pandas Series / one-column DataFrame or None
The label information to be set into Dataset. The label information to be set into Dataset.
Returns Returns
...@@ -1420,8 +1411,11 @@ class Dataset(object): ...@@ -1420,8 +1411,11 @@ class Dataset(object):
raise LightGBMError("Cannot get num_feature before construct dataset") raise LightGBMError("Cannot get num_feature before construct dataset")
def get_ref_chain(self, ref_limit=100): def get_ref_chain(self, ref_limit=100):
"""Get a chain of Dataset objects, starting with r, then going to r.reference if exists, """Get a chain of Dataset objects.
then to r.reference.reference, etc. until we hit ``ref_limit`` or a reference loop.
Starts with r, then goes to r.reference (if exists),
then to r.reference.reference, etc.
until we hit ``ref_limit`` or a reference loop.
Parameters Parameters
---------- ----------
...@@ -1449,6 +1443,7 @@ class Dataset(object): ...@@ -1449,6 +1443,7 @@ class Dataset(object):
class Booster(object): class Booster(object):
"""Booster in LightGBM.""" """Booster in LightGBM."""
def __init__(self, params=None, train_set=None, model_file=None, silent=False): def __init__(self, params=None, train_set=None, model_file=None, silent=False):
"""Initialize the Booster. """Initialize the Booster.
...@@ -1732,7 +1727,6 @@ class Booster(object): ...@@ -1732,7 +1727,6 @@ class Booster(object):
is_finished : bool is_finished : bool
Whether the update was successfully finished. Whether the update was successfully finished.
""" """
# need reset training data # need reset training data
if train_set is not None and train_set is not self.train_set: if train_set is not None and train_set is not self.train_set:
if not isinstance(train_set, Dataset): if not isinstance(train_set, Dataset):
...@@ -1762,18 +1756,19 @@ class Booster(object): ...@@ -1762,18 +1756,19 @@ class Booster(object):
return self.__boost(grad, hess) return self.__boost(grad, hess)
def __boost(self, grad, hess): def __boost(self, grad, hess):
""" """Boost Booster for one iteration with customized gradient statistics.
Boost Booster for one iteration with customized gradient statistics.
Note: For multi-class task, the score is group by class_id first, then group by row_id. Note
----
For multi-class task, the score is group by class_id first, then group by row_id.
If you want to get i-th row score in j-th class, the access way is score[j * num_data + i] If you want to get i-th row score in j-th class, the access way is score[j * num_data + i]
and you should group grad and hess in this way as well. and you should group grad and hess in this way as well.
Parameters Parameters
---------- ----------
grad : 1d numpy array or list grad : 1-D numpy array or 1-D list
The first order derivative (gradient). The first order derivative (gradient).
hess : 1d numpy or 1d list hess : 1-D numpy array or 1-D list
The second order derivative (Hessian). The second order derivative (Hessian).
Returns Returns
...@@ -1863,10 +1858,10 @@ class Booster(object): ...@@ -1863,10 +1858,10 @@ class Booster(object):
Name of the data. Name of the data.
feval : callable or None, optional (default=None) feval : callable or None, optional (default=None)
Customized evaluation function. Customized evaluation function.
Should accept two parameters: preds, train_data. Should accept two parameters: preds, train_data,
and return (eval_name, eval_result, is_higher_better) or list of such tuples.
For multi-class task, the preds is group by class_id first, then group by row_id. For multi-class task, the preds is group by class_id first, then group by row_id.
If you want to get i-th row preds in j-th class, the access way is preds[j * num_data + i]. If you want to get i-th row preds in j-th class, the access way is preds[j * num_data + i].
Note: should return (eval_name, eval_result, is_higher_better) or list of such tuples.
Returns Returns
------- -------
...@@ -1897,10 +1892,10 @@ class Booster(object): ...@@ -1897,10 +1892,10 @@ class Booster(object):
---------- ----------
feval : callable or None, optional (default=None) feval : callable or None, optional (default=None)
Customized evaluation function. Customized evaluation function.
Should accept two parameters: preds, train_data. Should accept two parameters: preds, train_data,
and return (eval_name, eval_result, is_higher_better) or list of such tuples.
For multi-class task, the preds is group by class_id first, then group by row_id. For multi-class task, the preds is group by class_id first, then group by row_id.
If you want to get i-th row preds in j-th class, the access way is preds[j * num_data + i]. If you want to get i-th row preds in j-th class, the access way is preds[j * num_data + i].
Note: should return (eval_name, eval_result, is_higher_better) or list of such tuples.
Returns Returns
------- -------
...@@ -1916,10 +1911,10 @@ class Booster(object): ...@@ -1916,10 +1911,10 @@ class Booster(object):
---------- ----------
feval : callable or None, optional (default=None) feval : callable or None, optional (default=None)
Customized evaluation function. Customized evaluation function.
Should accept two parameters: preds, train_data. Should accept two parameters: preds, train_data,
and return (eval_name, eval_result, is_higher_better) or list of such tuples.
For multi-class task, the preds is group by class_id first, then group by row_id. For multi-class task, the preds is group by class_id first, then group by row_id.
If you want to get i-th row preds in j-th class, the access way is preds[j * num_data + i]. If you want to get i-th row preds in j-th class, the access way is preds[j * num_data + i].
Note: should return (eval_name, eval_result, is_higher_better) or list of such tuples.
Returns Returns
------- -------
...@@ -1964,10 +1959,10 @@ class Booster(object): ...@@ -1964,10 +1959,10 @@ class Booster(object):
Parameters Parameters
---------- ----------
start_iteration : int, optional (default=0) start_iteration : int, optional (default=0)
Index of the iteration that will start to shuffle. The first iteration that will be shuffled.
end_iteration : int, optional (default=-1) end_iteration : int, optional (default=-1)
The last iteration that will be shuffled. The last iteration that will be shuffled.
If <= 0, means the last iteration. If <= 0, means the last available iteration.
Returns Returns
------- -------
...@@ -2044,7 +2039,7 @@ class Booster(object): ...@@ -2044,7 +2039,7 @@ class Booster(object):
ctypes.byref(tmp_out_len), ctypes.byref(tmp_out_len),
ptr_string_buffer)) ptr_string_buffer))
actual_len = tmp_out_len.value actual_len = tmp_out_len.value
'''if buffer length is not long enough, re-allocate a buffer''' # if buffer length is not long enough, re-allocate a buffer
if actual_len > buffer_len: if actual_len > buffer_len:
string_buffer = ctypes.create_string_buffer(actual_len) string_buffer = ctypes.create_string_buffer(actual_len)
ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)]) ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)])
...@@ -2088,7 +2083,7 @@ class Booster(object): ...@@ -2088,7 +2083,7 @@ class Booster(object):
ctypes.byref(tmp_out_len), ctypes.byref(tmp_out_len),
ptr_string_buffer)) ptr_string_buffer))
actual_len = tmp_out_len.value actual_len = tmp_out_len.value
'''if buffer length is not long enough, reallocate a buffer''' # if buffer length is not long enough, reallocate a buffer
if actual_len > buffer_len: if actual_len > buffer_len:
string_buffer = ctypes.create_string_buffer(actual_len) string_buffer = ctypes.create_string_buffer(actual_len)
ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)]) ptr_string_buffer = ctypes.c_char_p(*[ctypes.addressof(string_buffer)])
...@@ -2103,7 +2098,7 @@ class Booster(object): ...@@ -2103,7 +2098,7 @@ class Booster(object):
def predict(self, data, num_iteration=None, def predict(self, data, num_iteration=None,
raw_score=False, pred_leaf=False, pred_contrib=False, raw_score=False, pred_leaf=False, pred_contrib=False,
data_has_header=False, is_reshape=True, pred_parameter=None, **kwargs): data_has_header=False, is_reshape=True, **kwargs):
"""Make a prediction. """Make a prediction.
Parameters Parameters
...@@ -2133,7 +2128,8 @@ class Booster(object): ...@@ -2133,7 +2128,8 @@ class Booster(object):
Used only if data is string. Used only if data is string.
is_reshape : bool, optional (default=True) is_reshape : bool, optional (default=True)
If True, result is reshaped to [nrow, ncol]. If True, result is reshaped to [nrow, ncol].
**kwargs : other parameters for the prediction **kwargs
Other parameters for the prediction.
Returns Returns
------- -------
...@@ -2155,12 +2151,13 @@ class Booster(object): ...@@ -2155,12 +2151,13 @@ class Booster(object):
data : string, numpy array, pandas DataFrame or scipy.sparse data : string, numpy array, pandas DataFrame or scipy.sparse
Data source for refit. Data source for refit.
If string, it represents the path to txt file. If string, it represents the path to txt file.
label : list, numpy 1-D array or pandas one-column DataFrame/Series label : list, numpy 1-D array or pandas Series / one-column DataFrame
Label for refit. Label for refit.
decay_rate : float, optional (default=0.9) decay_rate : float, optional (default=0.9)
Decay rate of refit, Decay rate of refit,
will use ``leaf_output = decay_rate * old_leaf_output + (1.0 - decay_rate) * new_leaf_output`` to refit trees. will use ``leaf_output = decay_rate * old_leaf_output + (1.0 - decay_rate) * new_leaf_output`` to refit trees.
**kwargs : other parameters for refit **kwargs
Other parameters for refit.
These parameters will be passed to ``predict`` method. These parameters will be passed to ``predict`` method.
Returns Returns
...@@ -2214,7 +2211,7 @@ class Booster(object): ...@@ -2214,7 +2211,7 @@ class Booster(object):
return ret.value return ret.value
def _to_predictor(self, pred_parameter=None): def _to_predictor(self, pred_parameter=None):
"""Convert to predictor""" """Convert to predictor."""
predictor = _InnerPredictor(booster_handle=self.handle, pred_parameter=pred_parameter) predictor = _InnerPredictor(booster_handle=self.handle, pred_parameter=pred_parameter)
predictor.pandas_categorical = self.pandas_categorical predictor.pandas_categorical = self.pandas_categorical
return predictor return predictor
...@@ -2254,7 +2251,7 @@ class Booster(object): ...@@ -2254,7 +2251,7 @@ class Booster(object):
raise ValueError("Length of feature names doesn't equal with num_feature") raise ValueError("Length of feature names doesn't equal with num_feature")
return [string_buffers[i].value.decode() for i in range_(num_feature)] return [string_buffers[i].value.decode() for i in range_(num_feature)]
def feature_importance(self, importance_type='split', iteration=-1): def feature_importance(self, importance_type='split', iteration=None):
"""Get feature importances. """Get feature importances.
Parameters Parameters
...@@ -2263,12 +2260,18 @@ class Booster(object): ...@@ -2263,12 +2260,18 @@ class Booster(object):
How the importance is calculated. How the importance is calculated.
If "split", result contains numbers of times the feature is used in a model. If "split", result contains numbers of times the feature is used in a model.
If "gain", result contains total gains of splits which use the feature. If "gain", result contains total gains of splits which use the feature.
iteration : int or None, optional (default=None)
Limit number of iterations in the feature importance calculation.
If None, if the best iteration exists, it is used; otherwise, all trees are used.
If <= 0, all trees are used (no limits).
Returns Returns
------- -------
result : numpy array result : numpy array
Array with feature importances. Array with feature importances.
""" """
if iteration is None:
iteration = self.best_iteration
if importance_type == "split": if importance_type == "split":
importance_type_int = 0 importance_type_int = 0
elif importance_type == "gain": elif importance_type == "gain":
...@@ -2287,9 +2290,7 @@ class Booster(object): ...@@ -2287,9 +2290,7 @@ class Booster(object):
return result return result
def __inner_eval(self, data_name, data_idx, feval=None): def __inner_eval(self, data_name, data_idx, feval=None):
""" """Evaluate training or validation data."""
Evaluate training or validation data
"""
if data_idx >= self.__num_dataset: if data_idx >= self.__num_dataset:
raise ValueError("Data_idx should be smaller than number of dataset") raise ValueError("Data_idx should be smaller than number of dataset")
self.__get_eval_info() self.__get_eval_info()
...@@ -2322,9 +2323,7 @@ class Booster(object): ...@@ -2322,9 +2323,7 @@ class Booster(object):
return ret return ret
def __inner_predict(self, data_idx): def __inner_predict(self, data_idx):
""" """Predict for training and validation dataset."""
Predict for training and validation dataset
"""
if data_idx >= self.__num_dataset: if data_idx >= self.__num_dataset:
raise ValueError("Data_idx should be smaller than number of dataset") raise ValueError("Data_idx should be smaller than number of dataset")
if self.__inner_predict_buffer[data_idx] is None: if self.__inner_predict_buffer[data_idx] is None:
...@@ -2348,9 +2347,7 @@ class Booster(object): ...@@ -2348,9 +2347,7 @@ class Booster(object):
return self.__inner_predict_buffer[data_idx] return self.__inner_predict_buffer[data_idx]
def __get_eval_info(self): def __get_eval_info(self):
""" """Get inner evaluation count and names."""
Get inner evaluation count and names
"""
if self.__need_reload_eval_info: if self.__need_reload_eval_info:
self.__need_reload_eval_info = False self.__need_reload_eval_info = False
out_num_eval = ctypes.c_int(0) out_num_eval = ctypes.c_int(0)
...@@ -2392,7 +2389,7 @@ class Booster(object): ...@@ -2392,7 +2389,7 @@ class Booster(object):
return self.__attr.get(key, None) return self.__attr.get(key, None)
def set_attr(self, **kwargs): def set_attr(self, **kwargs):
"""Set the attribute of the Booster. """Set attributes to the Booster.
Parameters Parameters
---------- ----------
...@@ -2403,7 +2400,7 @@ class Booster(object): ...@@ -2403,7 +2400,7 @@ class Booster(object):
Returns Returns
------- -------
self : Booster self : Booster
Booster with set attribute. Booster with set attributes.
""" """
for key, value in kwargs.items(): for key, value in kwargs.items():
if value is not None: if value is not None:
......
# coding: utf-8 # coding: utf-8
# pylint: disable = invalid-name, W0105, C0301 # pylint: disable = invalid-name, W0105, C0301
"""Callbacks library."""
from __future__ import absolute_import from __future__ import absolute_import
import collections import collections
...@@ -9,14 +10,18 @@ from .compat import range_ ...@@ -9,14 +10,18 @@ from .compat import range_
class EarlyStopException(Exception): class EarlyStopException(Exception):
"""Exception of early stopping. """Exception of early stopping."""
def __init__(self, best_iteration, best_score):
"""Create early stopping exception.
Parameters Parameters
---------- ----------
best_iteration : int best_iteration : int
The best iteration stopped. The best iteration stopped.
best_score : float
The score of the best iteration.
""" """
def __init__(self, best_iteration, best_score):
super(EarlyStopException, self).__init__() super(EarlyStopException, self).__init__()
self.best_iteration = best_iteration self.best_iteration = best_iteration
self.best_score = best_score self.best_score = best_score
...@@ -34,7 +39,7 @@ CallbackEnv = collections.namedtuple( ...@@ -34,7 +39,7 @@ CallbackEnv = collections.namedtuple(
def _format_eval_result(value, show_stdv=True): def _format_eval_result(value, show_stdv=True):
"""format metric string""" """Format metric string."""
if len(value) == 4: if len(value) == 4:
return '%s\'s %s: %g' % (value[0], value[1], value[2]) return '%s\'s %s: %g' % (value[0], value[1], value[2])
elif len(value) == 5: elif len(value) == 5:
...@@ -61,13 +66,12 @@ def print_evaluation(period=1, show_stdv=True): ...@@ -61,13 +66,12 @@ def print_evaluation(period=1, show_stdv=True):
callback : function callback : function
The callback that prints the evaluation results every ``period`` iteration(s). The callback that prints the evaluation results every ``period`` iteration(s).
""" """
def callback(env): def _callback(env):
"""internal function"""
if period > 0 and env.evaluation_result_list and (env.iteration + 1) % period == 0: if period > 0 and env.evaluation_result_list and (env.iteration + 1) % period == 0:
result = '\t'.join([_format_eval_result(x, show_stdv) for x in env.evaluation_result_list]) result = '\t'.join([_format_eval_result(x, show_stdv) for x in env.evaluation_result_list])
print('[%d]\t%s' % (env.iteration + 1, result)) print('[%d]\t%s' % (env.iteration + 1, result))
callback.order = 10 _callback.order = 10
return callback return _callback
def record_evaluation(eval_result): def record_evaluation(eval_result):
...@@ -87,19 +91,17 @@ def record_evaluation(eval_result): ...@@ -87,19 +91,17 @@ def record_evaluation(eval_result):
raise TypeError('Eval_result should be a dictionary') raise TypeError('Eval_result should be a dictionary')
eval_result.clear() eval_result.clear()
def init(env): def _init(env):
"""internal function"""
for data_name, _, _, _ in env.evaluation_result_list: for data_name, _, _, _ in env.evaluation_result_list:
eval_result.setdefault(data_name, collections.defaultdict(list)) eval_result.setdefault(data_name, collections.defaultdict(list))
def callback(env): def _callback(env):
"""internal function"""
if not eval_result: if not eval_result:
init(env) _init(env)
for data_name, eval_name, result, _ in env.evaluation_result_list: for data_name, eval_name, result, _ in env.evaluation_result_list:
eval_result[data_name][eval_name].append(result) eval_result[data_name][eval_name].append(result)
callback.order = 20 _callback.order = 20
return callback return _callback
def reset_parameter(**kwargs): def reset_parameter(**kwargs):
...@@ -111,7 +113,7 @@ def reset_parameter(**kwargs): ...@@ -111,7 +113,7 @@ def reset_parameter(**kwargs):
Parameters Parameters
---------- ----------
**kwargs: value should be list or function **kwargs : value should be list or function
List of parameters for each boosting round List of parameters for each boosting round
or a customized function that calculates the parameter in terms of or a customized function that calculates the parameter in terms of
current number of round (e.g. yields learning rate decay). current number of round (e.g. yields learning rate decay).
...@@ -123,8 +125,7 @@ def reset_parameter(**kwargs): ...@@ -123,8 +125,7 @@ def reset_parameter(**kwargs):
callback : function callback : function
The callback that resets the parameter after the first iteration. The callback that resets the parameter after the first iteration.
""" """
def callback(env): def _callback(env):
"""internal function"""
new_parameters = {} new_parameters = {}
for key, value in kwargs.items(): for key, value in kwargs.items():
if key in ['num_class', 'num_classes', if key in ['num_class', 'num_classes',
...@@ -143,9 +144,9 @@ def reset_parameter(**kwargs): ...@@ -143,9 +144,9 @@ def reset_parameter(**kwargs):
if new_parameters: if new_parameters:
env.model.reset_parameter(new_parameters) env.model.reset_parameter(new_parameters)
env.params.update(new_parameters) env.params.update(new_parameters)
callback.before_iteration = True _callback.before_iteration = True
callback.order = 10 _callback.order = 10
return callback return _callback
def early_stopping(stopping_rounds, verbose=True): def early_stopping(stopping_rounds, verbose=True):
...@@ -164,7 +165,6 @@ def early_stopping(stopping_rounds, verbose=True): ...@@ -164,7 +165,6 @@ def early_stopping(stopping_rounds, verbose=True):
---------- ----------
stopping_rounds : int stopping_rounds : int
The possible number of rounds without the trend occurrence. The possible number of rounds without the trend occurrence.
verbose : bool, optional (default=True) verbose : bool, optional (default=True)
Whether to print message with early stopping information. Whether to print message with early stopping information.
...@@ -178,8 +178,7 @@ def early_stopping(stopping_rounds, verbose=True): ...@@ -178,8 +178,7 @@ def early_stopping(stopping_rounds, verbose=True):
best_score_list = [] best_score_list = []
cmp_op = [] cmp_op = []
def init(env): def _init(env):
"""internal function"""
if not env.evaluation_result_list: if not env.evaluation_result_list:
raise ValueError('For early stopping, ' raise ValueError('For early stopping, '
'at least one dataset and eval metric is required for evaluation') 'at least one dataset and eval metric is required for evaluation')
...@@ -198,10 +197,9 @@ def early_stopping(stopping_rounds, verbose=True): ...@@ -198,10 +197,9 @@ def early_stopping(stopping_rounds, verbose=True):
best_score.append(float('inf')) best_score.append(float('inf'))
cmp_op.append(lt) cmp_op.append(lt)
def callback(env): def _callback(env):
"""internal function"""
if not cmp_op: if not cmp_op:
init(env) _init(env)
for i in range_(len(env.evaluation_result_list)): for i in range_(len(env.evaluation_result_list)):
score = env.evaluation_result_list[i][2] score = env.evaluation_result_list[i][2]
if cmp_op[i](score, best_score[i]): if cmp_op[i](score, best_score[i]):
...@@ -218,5 +216,5 @@ def early_stopping(stopping_rounds, verbose=True): ...@@ -218,5 +216,5 @@ def early_stopping(stopping_rounds, verbose=True):
print('Did not meet early stopping. Best iteration is:\n[%d]\t%s' % ( print('Did not meet early stopping. Best iteration is:\n[%d]\t%s' % (
best_iter[i] + 1, '\t'.join([_format_eval_result(x) for x in best_score_list[i]]))) best_iter[i] + 1, '\t'.join([_format_eval_result(x) for x in best_score_list[i]])))
raise EarlyStopException(best_iter[i], best_score_list[i]) raise EarlyStopException(best_iter[i], best_score_list[i])
callback.order = 30 _callback.order = 30
return callback return _callback
# coding: utf-8 # coding: utf-8
# pylint: disable = C0103 # pylint: disable = C0103
"""Compatibility""" """Compatibility library."""
from __future__ import absolute_import from __future__ import absolute_import
import inspect import inspect
...@@ -10,7 +10,7 @@ import numpy as np ...@@ -10,7 +10,7 @@ import numpy as np
is_py3 = (sys.version_info[0] == 3) is_py3 = (sys.version_info[0] == 3)
"""compatibility between python2 and python3""" """Compatibility between Python2 and Python3"""
if is_py3: if is_py3:
zip_ = zip zip_ = zip
string_type = str string_type = str
...@@ -19,10 +19,11 @@ if is_py3: ...@@ -19,10 +19,11 @@ if is_py3:
range_ = range range_ = range
def argc_(func): def argc_(func):
"""return number of arguments of a function""" """Count the number of arguments of a function."""
return len(inspect.signature(func).parameters) return len(inspect.signature(func).parameters)
def decode_string(bytestring): def decode_string(bytestring):
"""Decode C bytestring to ordinary string."""
return bytestring.decode('utf-8') return bytestring.decode('utf-8')
else: else:
from itertools import izip as zip_ from itertools import izip as zip_
...@@ -32,10 +33,11 @@ else: ...@@ -32,10 +33,11 @@ else:
range_ = xrange range_ = xrange
def argc_(func): def argc_(func):
"""return number of arguments of a function""" """Count the number of arguments of a function."""
return len(inspect.getargspec(func).args) return len(inspect.getargspec(func).args)
def decode_string(bytestring): def decode_string(bytestring):
"""Decode C bytestring to ordinary string."""
return bytestring return bytestring
"""json""" """json"""
...@@ -48,6 +50,7 @@ except (ImportError, SyntaxError): ...@@ -48,6 +50,7 @@ except (ImportError, SyntaxError):
def json_default_with_numpy(obj): def json_default_with_numpy(obj):
"""Convert numpy classes to JSON serializable objects."""
if isinstance(obj, (np.integer, np.floating, np.bool_)): if isinstance(obj, (np.integer, np.floating, np.bool_)):
return obj.item() return obj.item()
elif isinstance(obj, np.ndarray): elif isinstance(obj, np.ndarray):
...@@ -64,9 +67,13 @@ except ImportError: ...@@ -64,9 +67,13 @@ except ImportError:
PANDAS_INSTALLED = False PANDAS_INSTALLED = False
class Series(object): class Series(object):
"""Dummy class for pandas.Series."""
pass pass
class DataFrame(object): class DataFrame(object):
"""Dummy class for pandas.DataFrame."""
pass pass
"""matplotlib""" """matplotlib"""
...@@ -131,4 +138,6 @@ except ImportError: ...@@ -131,4 +138,6 @@ except ImportError:
# DeprecationWarning is not shown by default, so let's create our own with higher level # DeprecationWarning is not shown by default, so let's create our own with higher level
class LGBMDeprecationWarning(UserWarning): class LGBMDeprecationWarning(UserWarning):
"""Custom deprecation warning."""
pass pass
# coding: utf-8 # coding: utf-8
# pylint: disable = invalid-name, W0105 # pylint: disable = invalid-name, W0105
"""Training Library containing training routines of LightGBM.""" """Library with training routines of LightGBM."""
from __future__ import absolute_import from __future__ import absolute_import
import collections import collections
...@@ -30,21 +30,21 @@ def train(params, train_set, num_boost_round=100, ...@@ -30,21 +30,21 @@ def train(params, train_set, num_boost_round=100,
params : dict params : dict
Parameters for training. Parameters for training.
train_set : Dataset train_set : Dataset
Data to be trained. Data to be trained on.
num_boost_round: int, optional (default=100) num_boost_round : int, optional (default=100)
Number of boosting iterations. Number of boosting iterations.
valid_sets: list of Datasets or None, optional (default=None) valid_sets : list of Datasets or None, optional (default=None)
List of data to be evaluated during training. List of data to be evaluated on during training.
valid_names: list of string or None, optional (default=None) valid_names : list of strings or None, optional (default=None)
Names of ``valid_sets``. Names of ``valid_sets``.
fobj : callable or None, optional (default=None) fobj : callable or None, optional (default=None)
Customized objective function. Customized objective function.
feval : callable or None, optional (default=None) feval : callable or None, optional (default=None)
Customized evaluation function. Customized evaluation function.
Should accept two parameters: preds, train_data. Should accept two parameters: preds, train_data,
and return (eval_name, eval_result, is_higher_better) or list of such tuples.
For multi-class task, the preds is group by class_id first, then group by row_id. For multi-class task, the preds is group by class_id first, then group by row_id.
If you want to get i-th row preds in j-th class, the access way is preds[j * num_data + i]. If you want to get i-th row preds in j-th class, the access way is preds[j * num_data + i].
Note: should return (eval_name, eval_result, is_higher_better) or list of such tuples.
To ignore the default metric corresponding to the used objective, To ignore the default metric corresponding to the used objective,
set the ``metric`` parameter to the string ``"None"`` in ``params``. set the ``metric`` parameter to the string ``"None"`` in ``params``.
init_model : string, Booster or None, optional (default=None) init_model : string, Booster or None, optional (default=None)
...@@ -60,23 +60,24 @@ def train(params, train_set, num_boost_round=100, ...@@ -60,23 +60,24 @@ def train(params, train_set, num_boost_round=100,
All values in categorical features should be less than int32 max value (2147483647). All values in categorical features should be less than int32 max value (2147483647).
Large values could be memory consuming. Consider using consecutive integers starting from zero. Large values could be memory consuming. Consider using consecutive integers starting from zero.
All negative values in categorical features will be treated as missing values. All negative values in categorical features will be treated as missing values.
early_stopping_rounds: int or None, optional (default=None) early_stopping_rounds : int or None, optional (default=None)
Activates early stopping. The model will train until the validation score stops improving. Activates early stopping. The model will train until the validation score stops improving.
Validation score needs to improve at least every ``early_stopping_rounds`` round(s) Validation score needs to improve at least every ``early_stopping_rounds`` round(s)
to continue training. to continue training.
Requires at least one validation data and one metric. Requires at least one validation data and one metric.
If there's more than one, will check all of them. But the training data is ignored anyway. If there's more than one, will check all of them. But the training data is ignored anyway.
If early stopping occurs, the model will add ``best_iteration`` field. If early stopping occurs, the model will add ``best_iteration`` field.
evals_result: dict or None, optional (default=None) evals_result : dict or None, optional (default=None)
This dictionary used to store all evaluation results of all the items in ``valid_sets``. This dictionary used to store all evaluation results of all the items in ``valid_sets``.
Example Example
------- -------
With a ``valid_sets`` = [valid_set, train_set], With a ``valid_sets`` = [valid_set, train_set],
``valid_names`` = ['eval', 'train'] ``valid_names`` = ['eval', 'train']
and a ``params`` = ('metric':'logloss') and a ``params`` = {'metric': 'logloss'}
returns: {'train': {'logloss': ['0.48253', '0.35953', ...]}, returns {'train': {'logloss': ['0.48253', '0.35953', ...]},
'eval': {'logloss': ['0.480385', '0.357756', ...]}}. 'eval': {'logloss': ['0.480385', '0.357756', ...]}}.
verbose_eval : bool or int, optional (default=True) verbose_eval : bool or int, optional (default=True)
Requires at least one validation data. Requires at least one validation data.
If True, the eval metric on the valid set is printed at each boosting stage. If True, the eval metric on the valid set is printed at each boosting stage.
...@@ -85,9 +86,10 @@ def train(params, train_set, num_boost_round=100, ...@@ -85,9 +86,10 @@ def train(params, train_set, num_boost_round=100,
Example Example
------- -------
With ``verbose_eval`` = 4 and at least one item in evals, With ``verbose_eval`` = 4 and at least one item in ``valid_sets``,
an evaluation metric is printed every 4 (instead of 1) boosting stages. an evaluation metric is printed every 4 (instead of 1) boosting stages.
learning_rates: list, callable or None, optional (default=None)
learning_rates : list, callable or None, optional (default=None)
List of learning rates for each boosting round List of learning rates for each boosting round
or a customized function that calculates ``learning_rate`` or a customized function that calculates ``learning_rate``
in terms of current number of round (e.g. yields learning rate decay). in terms of current number of round (e.g. yields learning rate decay).
...@@ -238,31 +240,30 @@ def train(params, train_set, num_boost_round=100, ...@@ -238,31 +240,30 @@ def train(params, train_set, num_boost_round=100,
return booster return booster
class CVBooster(object): class _CVBooster(object):
""""Auxiliary data struct to hold all boosters of CV.""" """Auxiliary data struct to hold all boosters of CV."""
def __init__(self): def __init__(self):
self.boosters = [] self.boosters = []
self.best_iteration = -1 self.best_iteration = -1
def append(self, booster): def append(self, booster):
"""add a booster to CVBooster""" """Add a booster to _CVBooster."""
self.boosters.append(booster) self.boosters.append(booster)
def __getattr__(self, name): def __getattr__(self, name):
"""redirect methods call of CVBooster""" """Redirect methods call of _CVBooster."""
def handlerFunction(*args, **kwargs): def handler_function(*args, **kwargs):
"""call methods with each booster, and concatenate their results""" """Call methods with each booster, and concatenate their results."""
ret = [] ret = []
for booster in self.boosters: for booster in self.boosters:
ret.append(getattr(booster, name)(*args, **kwargs)) ret.append(getattr(booster, name)(*args, **kwargs))
return ret return ret
return handlerFunction return handler_function
def _make_n_folds(full_data, folds, nfold, params, seed, fpreproc=None, stratified=True, shuffle=True): def _make_n_folds(full_data, folds, nfold, params, seed, fpreproc=None, stratified=True, shuffle=True):
""" """Make a n-fold list of Booster from random indices."""
Make an n-fold list of Booster from random indices.
"""
full_data = full_data.construct() full_data = full_data.construct()
num_data = full_data.num_data() num_data = full_data.num_data()
if folds is not None: if folds is not None:
...@@ -301,7 +302,7 @@ def _make_n_folds(full_data, folds, nfold, params, seed, fpreproc=None, stratifi ...@@ -301,7 +302,7 @@ def _make_n_folds(full_data, folds, nfold, params, seed, fpreproc=None, stratifi
train_id = [np.concatenate([test_id[i] for i in range_(nfold) if k != i]) for k in range_(nfold)] train_id = [np.concatenate([test_id[i] for i in range_(nfold) if k != i]) for k in range_(nfold)]
folds = zip_(train_id, test_id) folds = zip_(train_id, test_id)
ret = CVBooster() ret = _CVBooster()
for train_idx, test_idx in folds: for train_idx, test_idx in folds:
train_set = full_data.subset(train_idx) train_set = full_data.subset(train_idx)
valid_set = full_data.subset(test_idx) valid_set = full_data.subset(test_idx)
...@@ -317,9 +318,7 @@ def _make_n_folds(full_data, folds, nfold, params, seed, fpreproc=None, stratifi ...@@ -317,9 +318,7 @@ def _make_n_folds(full_data, folds, nfold, params, seed, fpreproc=None, stratifi
def _agg_cv_result(raw_results): def _agg_cv_result(raw_results):
""" """Aggregate cross-validation results."""
Aggregate cross-validation results.
"""
cvmap = collections.defaultdict(list) cvmap = collections.defaultdict(list)
metric_type = {} metric_type = {}
for one_result in raw_results: for one_result in raw_results:
...@@ -356,7 +355,7 @@ def cv(params, train_set, num_boost_round=100, ...@@ -356,7 +355,7 @@ def cv(params, train_set, num_boost_round=100,
Number of folds in CV. Number of folds in CV.
stratified : bool, optional (default=True) stratified : bool, optional (default=True)
Whether to perform stratified sampling. Whether to perform stratified sampling.
shuffle: bool, optional (default=True) shuffle : bool, optional (default=True)
Whether to shuffle before splitting data. Whether to shuffle before splitting data.
metrics : string, list of strings or None, optional (default=None) metrics : string, list of strings or None, optional (default=None)
Evaluation metrics to be monitored while CV. Evaluation metrics to be monitored while CV.
...@@ -365,10 +364,10 @@ def cv(params, train_set, num_boost_round=100, ...@@ -365,10 +364,10 @@ def cv(params, train_set, num_boost_round=100,
Custom objective function. Custom objective function.
feval : callable or None, optional (default=None) feval : callable or None, optional (default=None)
Customized evaluation function. Customized evaluation function.
Should accept two parameters: preds, train_data. Should accept two parameters: preds, train_data,
and return (eval_name, eval_result, is_higher_better) or list of such tuples.
For multi-class task, the preds is group by class_id first, then group by row_id. For multi-class task, the preds is group by class_id first, then group by row_id.
If you want to get i-th row preds in j-th class, the access way is preds[j * num_data + i]. If you want to get i-th row preds in j-th class, the access way is preds[j * num_data + i].
Note: should return (eval_name, eval_result, is_higher_better) or list of such tuples.
To ignore the default metric corresponding to the used objective, To ignore the default metric corresponding to the used objective,
set ``metrics`` to the string ``"None"``. set ``metrics`` to the string ``"None"``.
init_model : string, Booster or None, optional (default=None) init_model : string, Booster or None, optional (default=None)
...@@ -384,12 +383,12 @@ def cv(params, train_set, num_boost_round=100, ...@@ -384,12 +383,12 @@ def cv(params, train_set, num_boost_round=100,
All values in categorical features should be less than int32 max value (2147483647). All values in categorical features should be less than int32 max value (2147483647).
Large values could be memory consuming. Consider using consecutive integers starting from zero. Large values could be memory consuming. Consider using consecutive integers starting from zero.
All negative values in categorical features will be treated as missing values. All negative values in categorical features will be treated as missing values.
early_stopping_rounds: int or None, optional (default=None) early_stopping_rounds : int or None, optional (default=None)
Activates early stopping. Activates early stopping.
CV score needs to improve at least every ``early_stopping_rounds`` round(s) CV score needs to improve at least every ``early_stopping_rounds`` round(s)
to continue. to continue.
Requires at least one metric. If there's more than one, will check all of them. Requires at least one metric. If there's more than one, will check all of them.
Last entry in evaluation history is the one from best iteration. Last entry in evaluation history is the one from the best iteration.
fpreproc : callable or None, optional (default=None) fpreproc : callable or None, optional (default=None)
Preprocessing function that takes (dtrain, dtest, params) Preprocessing function that takes (dtrain, dtest, params)
and returns transformed versions of those. and returns transformed versions of those.
...@@ -400,7 +399,7 @@ def cv(params, train_set, num_boost_round=100, ...@@ -400,7 +399,7 @@ def cv(params, train_set, num_boost_round=100,
If int, progress will be displayed at every given ``verbose_eval`` boosting stage. If int, progress will be displayed at every given ``verbose_eval`` boosting stage.
show_stdv : bool, optional (default=True) show_stdv : bool, optional (default=True)
Whether to display the standard deviation in progress. Whether to display the standard deviation in progress.
Results are not affected by this parameter, and always contains std. Results are not affected by this parameter, and always contain std.
seed : int, optional (default=0) seed : int, optional (default=0)
Seed used to generate the folds (passed to numpy.random.seed). Seed used to generate the folds (passed to numpy.random.seed).
callbacks : list of callables or None, optional (default=None) callbacks : list of callables or None, optional (default=None)
......
# coding: utf-8 # coding: utf-8
"""Find the path to lightgbm dynamic library files.""" """Find the path to LightGBM dynamic library files."""
import os import os
from platform import system from platform import system
...@@ -7,17 +7,19 @@ from platform import system ...@@ -7,17 +7,19 @@ from platform import system
def find_lib_path(): def find_lib_path():
"""Find the path to LightGBM library files. """Find the path to LightGBM library files.
Returns Returns
------- -------
lib_path: list(string) lib_path: list of strings
List of all found library path to LightGBM List of all found library paths to LightGBM.
""" """
if os.environ.get('LIGHTGBM_BUILD_DOC', False): if os.environ.get('LIGHTGBM_BUILD_DOC', False):
# we don't need lib_lightgbm while building docs # we don't need lib_lightgbm while building docs
return [] return []
curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__))) curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
dll_path = [curr_path, os.path.join(curr_path, '../../'), dll_path = [curr_path,
os.path.join(curr_path, '../../'),
os.path.join(curr_path, 'compile'), os.path.join(curr_path, 'compile'),
os.path.join(curr_path, '../compile'), os.path.join(curr_path, '../compile'),
os.path.join(curr_path, '../../lib/')] os.path.join(curr_path, '../../lib/')]
...@@ -32,5 +34,5 @@ def find_lib_path(): ...@@ -32,5 +34,5 @@ def find_lib_path():
lib_path = [p for p in dll_path if os.path.exists(p) and os.path.isfile(p)] lib_path = [p for p in dll_path if os.path.exists(p) and os.path.isfile(p)]
if not lib_path: if not lib_path:
dll_path = [os.path.realpath(p) for p in dll_path] dll_path = [os.path.realpath(p) for p in dll_path]
raise Exception('Cannot find lightgbm library in following paths: ' + '\n'.join(dll_path)) raise Exception('Cannot find lightgbm library file in following paths:\n' + '\n'.join(dll_path))
return lib_path return lib_path
# coding: utf-8 # coding: utf-8
# pylint: disable = C0103 # pylint: disable = C0103
"""Plotting Library.""" """Plotting library."""
from __future__ import absolute_import from __future__ import absolute_import
import warnings import warnings
...@@ -15,8 +15,8 @@ from .compat import (MATPLOTLIB_INSTALLED, GRAPHVIZ_INSTALLED, LGBMDeprecationWa ...@@ -15,8 +15,8 @@ from .compat import (MATPLOTLIB_INSTALLED, GRAPHVIZ_INSTALLED, LGBMDeprecationWa
from .sklearn import LGBMModel from .sklearn import LGBMModel
def check_not_tuple_of_2_elements(obj, obj_name='obj'): def _check_not_tuple_of_2_elements(obj, obj_name='obj'):
"""check object is not tuple or does not have 2 elements""" """Check object is not tuple or does not have 2 elements."""
if not isinstance(obj, tuple) or len(obj) != 2: if not isinstance(obj, tuple) or len(obj) != 2:
raise TypeError('%s must be a tuple of 2 elements.' % obj_name) raise TypeError('%s must be a tuple of 2 elements.' % obj_name)
...@@ -63,7 +63,7 @@ def plot_importance(booster, ax=None, height=0.2, ...@@ -63,7 +63,7 @@ def plot_importance(booster, ax=None, height=0.2,
Figure size. Figure size.
grid : bool, optional (default=True) grid : bool, optional (default=True)
Whether to add a grid for axes. Whether to add a grid for axes.
**kwargs : other parameters **kwargs
Other parameters passed to ``ax.barh()``. Other parameters passed to ``ax.barh()``.
Returns Returns
...@@ -96,7 +96,7 @@ def plot_importance(booster, ax=None, height=0.2, ...@@ -96,7 +96,7 @@ def plot_importance(booster, ax=None, height=0.2,
if ax is None: if ax is None:
if figsize is not None: if figsize is not None:
check_not_tuple_of_2_elements(figsize, 'figsize') _check_not_tuple_of_2_elements(figsize, 'figsize')
_, ax = plt.subplots(1, 1, figsize=figsize) _, ax = plt.subplots(1, 1, figsize=figsize)
ylocs = np.arange(len(values)) ylocs = np.arange(len(values))
...@@ -109,13 +109,13 @@ def plot_importance(booster, ax=None, height=0.2, ...@@ -109,13 +109,13 @@ def plot_importance(booster, ax=None, height=0.2,
ax.set_yticklabels(labels) ax.set_yticklabels(labels)
if xlim is not None: if xlim is not None:
check_not_tuple_of_2_elements(xlim, 'xlim') _check_not_tuple_of_2_elements(xlim, 'xlim')
else: else:
xlim = (0, max(values) * 1.1) xlim = (0, max(values) * 1.1)
ax.set_xlim(xlim) ax.set_xlim(xlim)
if ylim is not None: if ylim is not None:
check_not_tuple_of_2_elements(ylim, 'ylim') _check_not_tuple_of_2_elements(ylim, 'ylim')
else: else:
ylim = (-1, len(values)) ylim = (-1, len(values))
ax.set_ylim(ylim) ax.set_ylim(ylim)
...@@ -194,7 +194,7 @@ def plot_metric(booster, metric=None, dataset_names=None, ...@@ -194,7 +194,7 @@ def plot_metric(booster, metric=None, dataset_names=None,
if ax is None: if ax is None:
if figsize is not None: if figsize is not None:
check_not_tuple_of_2_elements(figsize, 'figsize') _check_not_tuple_of_2_elements(figsize, 'figsize')
_, ax = plt.subplots(1, 1, figsize=figsize) _, ax = plt.subplots(1, 1, figsize=figsize)
if dataset_names is None: if dataset_names is None:
...@@ -229,13 +229,13 @@ def plot_metric(booster, metric=None, dataset_names=None, ...@@ -229,13 +229,13 @@ def plot_metric(booster, metric=None, dataset_names=None,
ax.legend(loc='best') ax.legend(loc='best')
if xlim is not None: if xlim is not None:
check_not_tuple_of_2_elements(xlim, 'xlim') _check_not_tuple_of_2_elements(xlim, 'xlim')
else: else:
xlim = (0, num_iteration) xlim = (0, num_iteration)
ax.set_xlim(xlim) ax.set_xlim(xlim)
if ylim is not None: if ylim is not None:
check_not_tuple_of_2_elements(ylim, 'ylim') _check_not_tuple_of_2_elements(ylim, 'ylim')
else: else:
range_result = max_result - min_result range_result = max_result - min_result
ylim = (min_result - range_result * 0.2, max_result + range_result * 0.2) ylim = (min_result - range_result * 0.2, max_result + range_result * 0.2)
...@@ -270,7 +270,7 @@ def _to_graphviz(tree_info, show_info, feature_names, precision=None, **kwargs): ...@@ -270,7 +270,7 @@ def _to_graphviz(tree_info, show_info, feature_names, precision=None, **kwargs):
if precision is not None and not isinstance(value, string_type) else str(value) if precision is not None and not isinstance(value, string_type) else str(value)
def add(root, parent=None, decision=None): def add(root, parent=None, decision=None):
"""recursively add node or edge""" """Recursively add node or edge."""
if 'split_index' in root: # non-leaf if 'split_index' in root: # non-leaf
name = 'split{0}'.format(root['split_index']) name = 'split{0}'.format(root['split_index'])
if feature_names is not None: if feature_names is not None:
...@@ -322,7 +322,7 @@ def create_tree_digraph(booster, tree_index=0, show_info=None, precision=None, ...@@ -322,7 +322,7 @@ def create_tree_digraph(booster, tree_index=0, show_info=None, precision=None,
Parameters Parameters
---------- ----------
booster : Booster or LGBMModel booster : Booster or LGBMModel
Booster or LGBMModel instance. Booster or LGBMModel instance to be converted.
tree_index : int, optional (default=0) tree_index : int, optional (default=0)
The index of a target tree to convert. The index of a target tree to convert.
show_info : list of strings or None, optional (default=None) show_info : list of strings or None, optional (default=None)
...@@ -330,7 +330,7 @@ def create_tree_digraph(booster, tree_index=0, show_info=None, precision=None, ...@@ -330,7 +330,7 @@ def create_tree_digraph(booster, tree_index=0, show_info=None, precision=None,
Possible values of list items: 'split_gain', 'internal_value', 'internal_count', 'leaf_count'. Possible values of list items: 'split_gain', 'internal_value', 'internal_count', 'leaf_count'.
precision : int or None, optional (default=None) precision : int or None, optional (default=None)
Used to restrict the display of floating point values to a certain precision. Used to restrict the display of floating point values to a certain precision.
**kwargs : other parameters **kwargs
Other parameters passed to ``Digraph`` constructor. Other parameters passed to ``Digraph`` constructor.
Check https://graphviz.readthedocs.io/en/stable/api.html#digraph for the full list of supported parameters. Check https://graphviz.readthedocs.io/en/stable/api.html#digraph for the full list of supported parameters.
...@@ -407,7 +407,7 @@ def plot_tree(booster, ax=None, tree_index=0, figsize=None, ...@@ -407,7 +407,7 @@ def plot_tree(booster, ax=None, tree_index=0, figsize=None,
Possible values of list items: 'split_gain', 'internal_value', 'internal_count', 'leaf_count'. Possible values of list items: 'split_gain', 'internal_value', 'internal_count', 'leaf_count'.
precision : int or None, optional (default=None) precision : int or None, optional (default=None)
Used to restrict the display of floating point values to a certain precision. Used to restrict the display of floating point values to a certain precision.
**kwargs : other parameters **kwargs
Other parameters passed to ``Digraph`` constructor. Other parameters passed to ``Digraph`` constructor.
Check https://graphviz.readthedocs.io/en/stable/api.html#digraph for the full list of supported parameters. Check https://graphviz.readthedocs.io/en/stable/api.html#digraph for the full list of supported parameters.
...@@ -433,7 +433,7 @@ def plot_tree(booster, ax=None, tree_index=0, figsize=None, ...@@ -433,7 +433,7 @@ def plot_tree(booster, ax=None, tree_index=0, figsize=None,
if ax is None: if ax is None:
if figsize is not None: if figsize is not None:
check_not_tuple_of_2_elements(figsize, 'figsize') _check_not_tuple_of_2_elements(figsize, 'figsize')
_, ax = plt.subplots(1, 1, figsize=figsize) _, ax = plt.subplots(1, 1, figsize=figsize)
graph = create_tree_digraph(booster=booster, tree_index=tree_index, graph = create_tree_digraph(booster=booster, tree_index=tree_index,
......
# coding: utf-8 # coding: utf-8
# pylint: disable = invalid-name, W0105, C0111, C0301 # pylint: disable = invalid-name, W0105, C0111, C0301
"""Scikit-Learn Wrapper interface for LightGBM.""" """Scikit-learn wrapper interface for LightGBM."""
from __future__ import absolute_import from __future__ import absolute_import
import numpy as np import numpy as np
...@@ -16,8 +16,11 @@ from .engine import train ...@@ -16,8 +16,11 @@ from .engine import train
def _objective_function_wrapper(func): def _objective_function_wrapper(func):
"""Decorate an objective function """Decorate an objective function.
Note: for multi-class task, the y_pred is group by class_id first, then group by row_id.
Note
----
For multi-class task, the y_pred is group by class_id first, then group by row_id.
If you want to get i-th row y_pred in j-th class, the access way is y_pred[j * num_data + i] If you want to get i-th row y_pred in j-th class, the access way is y_pred[j * num_data + i]
and you should group grad and hess in this way as well. and you should group grad and hess in this way as well.
...@@ -25,9 +28,10 @@ def _objective_function_wrapper(func): ...@@ -25,9 +28,10 @@ def _objective_function_wrapper(func):
---------- ----------
func : callable func : callable
Expects a callable with signature ``func(y_true, y_pred)`` or ``func(y_true, y_pred, group): Expects a callable with signature ``func(y_true, y_pred)`` or ``func(y_true, y_pred, group):
y_true : array-like of shape = [n_samples] y_true : array-like of shape = [n_samples]
The target values. The target values.
y_pred : array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class) y_pred : array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task)
The predicted values. The predicted values.
group : array-like group : array-like
Group/query data, used for ranking task. Group/query data, used for ranking task.
...@@ -38,14 +42,13 @@ def _objective_function_wrapper(func): ...@@ -38,14 +42,13 @@ def _objective_function_wrapper(func):
The new objective function as expected by ``lightgbm.engine.train``. The new objective function as expected by ``lightgbm.engine.train``.
The signature is ``new_func(preds, dataset)``: The signature is ``new_func(preds, dataset)``:
preds : array-like of shape = [n_samples] or shape = [n_samples * n_classes] preds : array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task)
The predicted values. The predicted values.
dataset : ``dataset`` dataset : Dataset
The training set from which the labels will be extracted using The training set from which the labels will be extracted using ``dataset.get_label()``.
``dataset.get_label()``.
""" """
def inner(preds, dataset): def inner(preds, dataset):
"""internal function""" """Call passed function with appropriate arguments."""
labels = dataset.get_label() labels = dataset.get_label()
argc = argc_(func) argc = argc_(func)
if argc == 2: if argc == 2:
...@@ -76,24 +79,27 @@ def _objective_function_wrapper(func): ...@@ -76,24 +79,27 @@ def _objective_function_wrapper(func):
def _eval_function_wrapper(func): def _eval_function_wrapper(func):
"""Decorate an eval function """Decorate an eval function.
Note: for multi-class task, the y_pred is group by class_id first, then group by row_id.
Note
----
For multi-class task, the y_pred is group by class_id first, then group by row_id.
If you want to get i-th row y_pred in j-th class, the access way is y_pred[j * num_data + i]. If you want to get i-th row y_pred in j-th class, the access way is y_pred[j * num_data + i].
Parameters Parameters
---------- ----------
func : callable func : callable
Expects a callable with following functions: Expects a callable with following signatures:
``func(y_true, y_pred)``, ``func(y_true, y_pred)``,
``func(y_true, y_pred, weight)`` ``func(y_true, y_pred, weight)``
or ``func(y_true, y_pred, weight, group)`` or ``func(y_true, y_pred, weight, group)``
and return (eval_name->str, eval_result->float, is_bigger_better->Bool): and returns (eval_name->string, eval_result->float, is_bigger_better->bool):
y_true : array-like of shape = [n_samples] y_true : array-like of shape = [n_samples]
The target values. The target values.
y_pred : array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class) y_pred : array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task)
The predicted values. The predicted values.
weight : array_like of shape = [n_samples] weight : array-like of shape = [n_samples]
The weight of samples. The weight of samples.
group : array-like group : array-like
Group/query data, used for ranking task. Group/query data, used for ranking task.
...@@ -104,14 +110,13 @@ def _eval_function_wrapper(func): ...@@ -104,14 +110,13 @@ def _eval_function_wrapper(func):
The new eval function as expected by ``lightgbm.engine.train``. The new eval function as expected by ``lightgbm.engine.train``.
The signature is ``new_func(preds, dataset)``: The signature is ``new_func(preds, dataset)``:
preds : array-like of shape = [n_samples] or shape = [n_samples * n_classes] preds : array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task)
The predicted values. The predicted values.
dataset : ``dataset`` dataset : Dataset
The training set from which the labels will be extracted using The training set from which the labels will be extracted using ``dataset.get_label()``.
``dataset.get_label()``.
""" """
def inner(preds, dataset): def inner(preds, dataset):
"""internal function""" """Call passed function with appropriate arguments."""
labels = dataset.get_label() labels = dataset.get_label()
argc = argc_(func) argc = argc_(func)
if argc == 2: if argc == 2:
...@@ -128,18 +133,18 @@ def _eval_function_wrapper(func): ...@@ -128,18 +133,18 @@ def _eval_function_wrapper(func):
class LGBMModel(_LGBMModelBase): class LGBMModel(_LGBMModelBase):
"""Implementation of the scikit-learn API for LightGBM.""" """Implementation of the scikit-learn API for LightGBM."""
def __init__(self, boosting_type="gbdt", num_leaves=31, max_depth=-1, def __init__(self, boosting_type='gbdt', num_leaves=31, max_depth=-1,
learning_rate=0.1, n_estimators=100, learning_rate=0.1, n_estimators=100,
subsample_for_bin=200000, objective=None, class_weight=None, subsample_for_bin=200000, objective=None, class_weight=None,
min_split_gain=0., min_child_weight=1e-3, min_child_samples=20, min_split_gain=0., min_child_weight=1e-3, min_child_samples=20,
subsample=1., subsample_freq=0, colsample_bytree=1., subsample=1., subsample_freq=0, colsample_bytree=1.,
reg_alpha=0., reg_lambda=0., random_state=None, reg_alpha=0., reg_lambda=0., random_state=None,
n_jobs=-1, silent=True, importance_type='split', **kwargs): n_jobs=-1, silent=True, importance_type='split', **kwargs):
"""Construct a gradient boosting model. r"""Construct a gradient boosting model.
Parameters Parameters
---------- ----------
boosting_type : string, optional (default="gbdt") boosting_type : string, optional (default='gbdt')
'gbdt', traditional Gradient Boosting Decision Tree. 'gbdt', traditional Gradient Boosting Decision Tree.
'dart', Dropouts meet Multiple Additive Regression Trees. 'dart', Dropouts meet Multiple Additive Regression Trees.
'goss', Gradient-based One-Side Sampling. 'goss', Gradient-based One-Side Sampling.
...@@ -168,14 +173,14 @@ class LGBMModel(_LGBMModelBase): ...@@ -168,14 +173,14 @@ class LGBMModel(_LGBMModelBase):
The 'balanced' mode uses the values of y to automatically adjust weights The 'balanced' mode uses the values of y to automatically adjust weights
inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))``. inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))``.
If None, all classes are supposed to have weight one. If None, all classes are supposed to have weight one.
Note that these weights will be multiplied with ``sample_weight`` (passed through the fit method) Note, that these weights will be multiplied with ``sample_weight`` (passed through the ``fit`` method)
if ``sample_weight`` is specified. if ``sample_weight`` is specified.
min_split_gain : float, optional (default=0.) min_split_gain : float, optional (default=0.)
Minimum loss reduction required to make a further partition on a leaf node of the tree. Minimum loss reduction required to make a further partition on a leaf node of the tree.
min_child_weight : float, optional (default=1e-3) min_child_weight : float, optional (default=1e-3)
Minimum sum of instance weight(hessian) needed in a child(leaf). Minimum sum of instance weight (hessian) needed in a child (leaf).
min_child_samples : int, optional (default=20) min_child_samples : int, optional (default=20)
Minimum number of data need in a child(leaf). Minimum number of data needed in a child (leaf).
subsample : float, optional (default=1.) subsample : float, optional (default=1.)
Subsample ratio of the training instance. Subsample ratio of the training instance.
subsample_freq : int, optional (default=0) subsample_freq : int, optional (default=0)
...@@ -195,14 +200,15 @@ class LGBMModel(_LGBMModelBase): ...@@ -195,14 +200,15 @@ class LGBMModel(_LGBMModelBase):
Whether to print messages while running boosting. Whether to print messages while running boosting.
importance_type : string, optional (default='split') importance_type : string, optional (default='split')
The type of feature importance to be filled into ``feature_importances_``. The type of feature importance to be filled into ``feature_importances_``.
If "split", result contains numbers of times the feature is used in a model. If 'split', result contains numbers of times the feature is used in a model.
If "gain", result contains total gains of splits which use the feature. If 'gain', result contains total gains of splits which use the feature.
**kwargs : other parameters **kwargs
Other parameters for the model.
Check http://lightgbm.readthedocs.io/en/latest/Parameters.html for more parameters. Check http://lightgbm.readthedocs.io/en/latest/Parameters.html for more parameters.
Note Note
---- ----
\\*\\*kwargs is not supported in sklearn, it may cause unexpected issues. \*\*kwargs is not supported in sklearn, it may cause unexpected issues.
Attributes Attributes
---------- ----------
...@@ -227,8 +233,8 @@ class LGBMModel(_LGBMModelBase): ...@@ -227,8 +233,8 @@ class LGBMModel(_LGBMModelBase):
Note Note
---- ----
A custom objective function can be provided for the ``objective`` A custom objective function can be provided for the ``objective`` parameter.
parameter. In this case, it should have the signature In this case, it should have the signature
``objective(y_true, y_pred) -> grad, hess`` or ``objective(y_true, y_pred) -> grad, hess`` or
``objective(y_true, y_pred, group) -> grad, hess``: ``objective(y_true, y_pred, group) -> grad, hess``:
...@@ -282,12 +288,37 @@ class LGBMModel(_LGBMModelBase): ...@@ -282,12 +288,37 @@ class LGBMModel(_LGBMModelBase):
self.set_params(**kwargs) self.set_params(**kwargs)
def get_params(self, deep=True): def get_params(self, deep=True):
"""Get parameters for this estimator.
Parameters
----------
deep : bool, optional (default=True)
If True, will return the parameters for this estimator and
contained subobjects that are estimators.
Returns
-------
params : dict
Parameter names mapped to their values.
"""
params = super(LGBMModel, self).get_params(deep=deep) params = super(LGBMModel, self).get_params(deep=deep)
params.update(self._other_params) params.update(self._other_params)
return params return params
# minor change to support `**kwargs` # minor change to support `**kwargs`
def set_params(self, **params): def set_params(self, **params):
"""Set the parameters of this estimator.
Parameters
----------
**params
Parameter names with their new values.
Returns
-------
self : object
Returns self.
"""
for key, value in params.items(): for key, value in params.items():
setattr(self, key, value) setattr(self, key, value)
if hasattr(self, '_' + key): if hasattr(self, '_' + key):
...@@ -340,10 +371,10 @@ class LGBMModel(_LGBMModelBase): ...@@ -340,10 +371,10 @@ class LGBMModel(_LGBMModelBase):
If there's more than one, will check all of them. But the training data is ignored anyway. If there's more than one, will check all of them. But the training data is ignored anyway.
verbose : bool, optional (default=True) verbose : bool, optional (default=True)
If True and an evaluation set is used, writes the evaluation progress. If True and an evaluation set is used, writes the evaluation progress.
feature_name : list of strings or 'auto', optional (default="auto") feature_name : list of strings or 'auto', optional (default='auto')
Feature names. Feature names.
If 'auto' and data is pandas DataFrame, data columns names are used. If 'auto' and data is pandas DataFrame, data columns names are used.
categorical_feature : list of strings or int, or 'auto', optional (default="auto") categorical_feature : list of strings or int, or 'auto', optional (default='auto')
Categorical features. Categorical features.
If list of int, interpreted as indices. If list of int, interpreted as indices.
If list of strings, interpreted as feature names (need to specify ``feature_name`` as well). If list of strings, interpreted as feature names (need to specify ``feature_name`` as well).
...@@ -362,15 +393,15 @@ class LGBMModel(_LGBMModelBase): ...@@ -362,15 +393,15 @@ class LGBMModel(_LGBMModelBase):
Note Note
---- ----
Custom eval function expects a callable with following functions: Custom eval function expects a callable with following signatures:
``func(y_true, y_pred)``, ``func(y_true, y_pred, weight)`` or ``func(y_true, y_pred)``, ``func(y_true, y_pred, weight)`` or
``func(y_true, y_pred, weight, group)``. ``func(y_true, y_pred, weight, group)``
Returns (eval_name, eval_result, is_bigger_better) or and returns (eval_name, eval_result, is_bigger_better) or
list of (eval_name, eval_result, is_bigger_better) list of (eval_name, eval_result, is_bigger_better):
y_true : array-like of shape = [n_samples] y_true : array-like of shape = [n_samples]
The target values. The target values.
y_pred : array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class) y_pred : array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task)
The predicted values. The predicted values.
weight : array-like of shape = [n_samples] weight : array-like of shape = [n_samples]
The weight of samples. The weight of samples.
...@@ -539,7 +570,8 @@ class LGBMModel(_LGBMModelBase): ...@@ -539,7 +570,8 @@ class LGBMModel(_LGBMModelBase):
like SHAP interaction values, like SHAP interaction values,
you can install shap package (https://github.com/slundberg/shap). you can install shap package (https://github.com/slundberg/shap).
**kwargs : other parameters for the prediction **kwargs
Other parameters for the prediction.
Returns Returns
------- -------
...@@ -629,7 +661,7 @@ class LGBMRegressor(LGBMModel, _LGBMRegressorBase): ...@@ -629,7 +661,7 @@ class LGBMRegressor(LGBMModel, _LGBMRegressorBase):
eval_set=None, eval_names=None, eval_sample_weight=None, eval_set=None, eval_names=None, eval_sample_weight=None,
eval_init_score=None, eval_metric=None, early_stopping_rounds=None, eval_init_score=None, eval_metric=None, early_stopping_rounds=None,
verbose=True, feature_name='auto', categorical_feature='auto', callbacks=None): verbose=True, feature_name='auto', categorical_feature='auto', callbacks=None):
"""Docstring is inherited from the LGBMModel."""
super(LGBMRegressor, self).fit(X, y, sample_weight=sample_weight, super(LGBMRegressor, self).fit(X, y, sample_weight=sample_weight,
init_score=init_score, eval_set=eval_set, init_score=init_score, eval_set=eval_set,
eval_names=eval_names, eval_names=eval_names,
...@@ -656,6 +688,7 @@ class LGBMClassifier(LGBMModel, _LGBMClassifierBase): ...@@ -656,6 +688,7 @@ class LGBMClassifier(LGBMModel, _LGBMClassifierBase):
eval_class_weight=None, eval_init_score=None, eval_metric=None, eval_class_weight=None, eval_init_score=None, eval_metric=None,
early_stopping_rounds=None, verbose=True, early_stopping_rounds=None, verbose=True,
feature_name='auto', categorical_feature='auto', callbacks=None): feature_name='auto', categorical_feature='auto', callbacks=None):
"""Docstring is inherited from the LGBMModel."""
_LGBMAssertAllFinite(y) _LGBMAssertAllFinite(y)
_LGBMCheckClassificationTargets(y) _LGBMCheckClassificationTargets(y)
self._le = _LGBMLabelEncoder().fit(y) self._le = _LGBMLabelEncoder().fit(y)
...@@ -704,6 +737,7 @@ class LGBMClassifier(LGBMModel, _LGBMClassifierBase): ...@@ -704,6 +737,7 @@ class LGBMClassifier(LGBMModel, _LGBMClassifierBase):
def predict(self, X, raw_score=False, num_iteration=None, def predict(self, X, raw_score=False, num_iteration=None,
pred_leaf=False, pred_contrib=False, **kwargs): pred_leaf=False, pred_contrib=False, **kwargs):
"""Docstring is inherited from the LGBMModel."""
result = self.predict_proba(X, raw_score, num_iteration, result = self.predict_proba(X, raw_score, num_iteration,
pred_leaf, pred_contrib, **kwargs) pred_leaf, pred_contrib, **kwargs)
if raw_score or pred_leaf or pred_contrib: if raw_score or pred_leaf or pred_contrib:
...@@ -739,7 +773,8 @@ class LGBMClassifier(LGBMModel, _LGBMClassifierBase): ...@@ -739,7 +773,8 @@ class LGBMClassifier(LGBMModel, _LGBMClassifierBase):
like SHAP interaction values, like SHAP interaction values,
you can install shap package (https://github.com/slundberg/shap). you can install shap package (https://github.com/slundberg/shap).
**kwargs : other parameters for the prediction **kwargs
Other parameters for the prediction.
Returns Returns
------- -------
...@@ -781,6 +816,7 @@ class LGBMRanker(LGBMModel): ...@@ -781,6 +816,7 @@ class LGBMRanker(LGBMModel):
eval_init_score=None, eval_group=None, eval_metric=None, eval_init_score=None, eval_group=None, eval_metric=None,
eval_at=[1], early_stopping_rounds=None, verbose=True, eval_at=[1], early_stopping_rounds=None, verbose=True,
feature_name='auto', categorical_feature='auto', callbacks=None): feature_name='auto', categorical_feature='auto', callbacks=None):
"""Docstring is inherited from the LGBMModel."""
# check group data # check group data
if group is None: if group is None:
raise ValueError("Should set group for ranking task") raise ValueError("Should set group for ranking task")
......
...@@ -16,7 +16,8 @@ def find_lib_path(): ...@@ -16,7 +16,8 @@ def find_lib_path():
return [] return []
curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__))) curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
dll_path = [curr_path, os.path.join(curr_path, '../../'), dll_path = [curr_path,
os.path.join(curr_path, '../../'),
os.path.join(curr_path, '../../python-package/lightgbm/compile'), os.path.join(curr_path, '../../python-package/lightgbm/compile'),
os.path.join(curr_path, '../../python-package/compile'), os.path.join(curr_path, '../../python-package/compile'),
os.path.join(curr_path, '../../lib/')] os.path.join(curr_path, '../../lib/')]
...@@ -31,7 +32,7 @@ def find_lib_path(): ...@@ -31,7 +32,7 @@ def find_lib_path():
lib_path = [p for p in dll_path if os.path.exists(p) and os.path.isfile(p)] lib_path = [p for p in dll_path if os.path.exists(p) and os.path.isfile(p)]
if not lib_path: if not lib_path:
dll_path = [os.path.realpath(p) for p in dll_path] dll_path = [os.path.realpath(p) for p in dll_path]
raise Exception('Cannot find lightgbm library in following paths: ' + '\n'.join(dll_path)) raise Exception('Cannot find lightgbm library file in following paths:\n' + '\n'.join(dll_path))
return lib_path return lib_path
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment