"src/vscode:/vscode.git/clone" did not exist on "0551f77517d971ce9bc32e2633768245957b410a"
Commit ccf2570c authored by Nikita Titov's avatar Nikita Titov Committed by Qiwei Ye
Browse files

[docs][ci][python] added docstring style test and fixed errors in existing docstrings (#1759)

* added docstring style test and fixed errors in existing docstrings

* hotfix

* hotfix

* fix grammar

* hotfix
parent dfdf8861
...@@ -45,8 +45,9 @@ if [[ $TRAVIS == "true" ]] && [[ $TASK == "check-docs" ]]; then ...@@ -45,8 +45,9 @@ if [[ $TRAVIS == "true" ]] && [[ $TASK == "check-docs" ]]; then
fi fi
if [[ $TASK == "pylint" ]]; then if [[ $TASK == "pylint" ]]; then
conda install -y -n $CONDA_ENV pycodestyle conda install -y -n $CONDA_ENV pycodestyle pydocstyle
pycodestyle --ignore=E501,W503 --exclude=./compute,./.nuget . || exit -1 pycodestyle --ignore=E501,W503 --exclude=./compute,./.nuget . || exit -1
pydocstyle --convention=numpy --add-ignore=D105 --match-dir="^(?!^compute|test|example).*" --match="(?!^test_|setup).*\.py" . || exit -1
exit 0 exit 0
fi fi
......
# coding: utf-8
"""Script for generating files with NuGet package metadata."""
import os import os
import sys import sys
......
...@@ -16,7 +16,7 @@ ...@@ -16,7 +16,7 @@
# If extensions (or modules to document with autodoc) are in another directory, # If extensions (or modules to document with autodoc) are in another directory,
# add these directories to sys.path here. If the directory is relative to the # add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute. # documentation root, use os.path.abspath to make it absolute.
"""Sphinx configuration file."""
import datetime import datetime
import os import os
import sys import sys
...@@ -128,4 +128,11 @@ htmlhelp_basename = 'LightGBMdoc' ...@@ -128,4 +128,11 @@ htmlhelp_basename = 'LightGBMdoc'
def setup(app): def setup(app):
"""Add new elements at Sphinx initialization time.
Parameters
----------
app : object
The application object representing the Sphinx process.
"""
app.add_javascript("js/script.js") app.add_javascript("js/script.js")
# coding: utf-8 # coding: utf-8
# pylint: disable = invalid-name, C0111 # pylint: disable = invalid-name, C0111
''' """Comparison of `binary` and `xentropy` objectives.
BLUF: The `xentropy` objective does logistic regression and generalizes BLUF: The `xentropy` objective does logistic regression and generalizes
to the case where labels are probabilistic (i.e. numbers between 0 and 1). to the case where labels are probabilistic (i.e. numbers between 0 and 1).
...@@ -9,7 +10,7 @@ Details: Both `binary` and `xentropy` minimize the log loss and use ...@@ -9,7 +10,7 @@ Details: Both `binary` and `xentropy` minimize the log loss and use
between them with default settings is that `binary` may achieve a slight between them with default settings is that `binary` may achieve a slight
speed improvement by assuming that the labels are binary instead of speed improvement by assuming that the labels are binary instead of
probabilistic. probabilistic.
''' """
import time import time
...@@ -46,19 +47,28 @@ DATA = { ...@@ -46,19 +47,28 @@ DATA = {
################# #################
# Set up a couple of utilities for our experiments # Set up a couple of utilities for our experiments
def log_loss(preds, labels): def log_loss(preds, labels):
''' logarithmic loss with non-necessarily-binary labels ''' """Logarithmic loss with non-necessarily-binary labels."""
log_likelihood = np.sum(labels * np.log(preds)) / len(preds) log_likelihood = np.sum(labels * np.log(preds)) / len(preds)
return -log_likelihood return -log_likelihood
def experiment(objective, label_type, data): def experiment(objective, label_type, data):
''' """Measure performance of an objective.
Measure performance of an objective
:param objective: (str) 'binary' or 'xentropy' Parameters
:param label_type: (str) 'binary' or 'probability' ----------
:param data: DATA objective : string 'binary' or 'xentropy'
:return: dict with experiment summary stats Objective function.
''' label_type : string 'binary' or 'probability'
Type of the label.
data : dict
Data for training.
Returns
-------
result : dict
Experiment summary stats.
"""
np.random.seed(0) np.random.seed(0)
nrounds = 5 nrounds = 5
lgb_data = data['lgb_with_' + label_type + '_labels'] lgb_data = data['lgb_with_' + label_type + '_labels']
......
# coding: utf-8 # coding: utf-8
"""This script generates LightGBM/src/io/config_auto.cpp file """Helper script for generating config file and parameters list.
This script generates LightGBM/src/io/config_auto.cpp file
with list of all parameters, aliases table and other routines with list of all parameters, aliases table and other routines
along with parameters description in LightGBM/docs/Parameters.rst file along with parameters description in LightGBM/docs/Parameters.rst file
from the information in LightGBM/include/LightGBM/config.h file. from the information in LightGBM/include/LightGBM/config.h file.
...@@ -7,7 +9,19 @@ from the information in LightGBM/include/LightGBM/config.h file. ...@@ -7,7 +9,19 @@ from the information in LightGBM/include/LightGBM/config.h file.
import os import os
def GetParameterInfos(config_hpp): def get_parameter_infos(config_hpp):
"""Parse config header file.
Parameters
----------
config_hpp : string
Path to the config header file.
Returns
-------
infos : tuple
Tuple with names and content of sections.
"""
is_inparameter = False is_inparameter = False
parameter_group = None parameter_group = None
cur_key = None cur_key = None
...@@ -63,7 +77,19 @@ def GetParameterInfos(config_hpp): ...@@ -63,7 +77,19 @@ def GetParameterInfos(config_hpp):
return keys, member_infos return keys, member_infos
def GetNames(infos): def get_names(infos):
"""Get names of all parameters.
Parameters
----------
infos : list
Content of the config header file.
Returns
-------
names : list
Names of all parameters.
"""
names = [] names = []
for x in infos: for x in infos:
for y in x: for y in x:
...@@ -71,7 +97,19 @@ def GetNames(infos): ...@@ -71,7 +97,19 @@ def GetNames(infos):
return names return names
def GetAlias(infos): def get_alias(infos):
"""Get aliases of all parameters.
Parameters
----------
infos : list
Content of the config header file.
Returns
-------
pairs : list
List of tuples (param alias, param name).
"""
pairs = [] pairs = []
for x in infos: for x in infos:
for y in x: for y in x:
...@@ -83,7 +121,23 @@ def GetAlias(infos): ...@@ -83,7 +121,23 @@ def GetAlias(infos):
return pairs return pairs
def SetOneVarFromString(name, param_type, checks): def set_one_var_from_string(name, param_type, checks):
"""Construct code for auto config file for one param value.
Parameters
----------
name : string
Name of the parameter.
param_type : string
Type of the parameter.
checks : list
Constraints of the parameter.
Returns
-------
ret : string
Lines of auto config file with getting and checks of one parameter value.
"""
ret = "" ret = ""
univar_mapper = {"int": "GetInt", "double": "GetDouble", "bool": "GetBool", "std::string": "GetString"} univar_mapper = {"int": "GetInt", "double": "GetDouble", "bool": "GetBool", "std::string": "GetString"}
if "vector" not in param_type: if "vector" not in param_type:
...@@ -103,9 +157,33 @@ def SetOneVarFromString(name, param_type, checks): ...@@ -103,9 +157,33 @@ def SetOneVarFromString(name, param_type, checks):
return ret return ret
def GenParameterDescription(sections, descriptions, params_rst): def gen_parameter_description(sections, descriptions, params_rst):
"""Write descriptions of parameters to the documentation file.
Parameters
----------
sections : list
Names of parameters sections.
descriptions : list
Structured descriptions of parameters.
params_rst : string
Path to the file with parameters documentation.
"""
def parse_check(check, reverse=False): def parse_check(check, reverse=False):
"""Parse the constraint.
Parameters
----------
check : string
String representation of the constraint.
reverse : bool, optional (default=False)
Whether to reverse the sign of the constraint.
Returns
-------
pair : tuple
Parsed constraint in the form of tuple (value, sign).
"""
try: try:
idx = 1 idx = 1
float(check[idx:]) float(check[idx:])
...@@ -164,10 +242,24 @@ def GenParameterDescription(sections, descriptions, params_rst): ...@@ -164,10 +242,24 @@ def GenParameterDescription(sections, descriptions, params_rst):
new_params_file.write(after) new_params_file.write(after)
def GenParameterCode(config_hpp, config_out_cpp): def gen_parameter_code(config_hpp, config_out_cpp):
keys, infos = GetParameterInfos(config_hpp) """Generate auto config file.
names = GetNames(infos)
alias = GetAlias(infos) Parameters
----------
config_hpp : string
Path to the config header file.
config_out_cpp : string
Path to the auto config file.
Returns
-------
infos : tuple
Tuple with names and content of sections.
"""
keys, infos = get_parameter_infos(config_hpp)
names = get_names(infos)
alias = get_alias(infos)
str_to_write = "/// This file is auto generated by LightGBM\\helper\\parameter_generator.py from LightGBM\\include\\LightGBM\\config.h file.\n" str_to_write = "/// This file is auto generated by LightGBM\\helper\\parameter_generator.py from LightGBM\\include\\LightGBM\\config.h file.\n"
str_to_write += "#include<LightGBM/config.h>\nnamespace LightGBM {\n" str_to_write += "#include<LightGBM/config.h>\nnamespace LightGBM {\n"
# alias table # alias table
...@@ -192,7 +284,7 @@ def GenParameterCode(config_hpp, config_out_cpp): ...@@ -192,7 +284,7 @@ def GenParameterCode(config_hpp, config_out_cpp):
checks = [] checks = []
if "check" in y: if "check" in y:
checks = y["check"] checks = y["check"]
tmp = SetOneVarFromString(name, param_type, checks) tmp = set_one_var_from_string(name, param_type, checks)
str_to_write += tmp str_to_write += tmp
# tails # tails
str_to_write += "}\n\n" str_to_write += "}\n\n"
...@@ -226,5 +318,5 @@ if __name__ == "__main__": ...@@ -226,5 +318,5 @@ if __name__ == "__main__":
config_hpp = os.path.join(current_dir, os.path.pardir, 'include', 'LightGBM', 'config.h') config_hpp = os.path.join(current_dir, os.path.pardir, 'include', 'LightGBM', 'config.h')
config_out_cpp = os.path.join(current_dir, os.path.pardir, 'src', 'io', 'config_auto.cpp') config_out_cpp = os.path.join(current_dir, os.path.pardir, 'src', 'io', 'config_auto.cpp')
params_rst = os.path.join(current_dir, os.path.pardir, 'docs', 'Parameters.rst') params_rst = os.path.join(current_dir, os.path.pardir, 'docs', 'Parameters.rst')
sections, descriptions = GenParameterCode(config_hpp, config_out_cpp) sections, descriptions = gen_parameter_code(config_hpp, config_out_cpp)
GenParameterDescription(sections, descriptions, params_rst) gen_parameter_description(sections, descriptions, params_rst)
...@@ -151,8 +151,8 @@ Examples ...@@ -151,8 +151,8 @@ Examples
Refer to the walk through examples in `Python guide folder <https://github.com/Microsoft/LightGBM/tree/master/examples/python-guide>`_. Refer to the walk through examples in `Python guide folder <https://github.com/Microsoft/LightGBM/tree/master/examples/python-guide>`_.
Developments Development Guide
------------ -----------------
The code style of Python-package follows `PEP 8 <https://www.python.org/dev/peps/pep-0008/>`_. If you would like to make a contribution and not familiar with PEP 8, please check the PEP 8 style guide first. Otherwise, the check won't pass. You should be careful about: The code style of Python-package follows `PEP 8 <https://www.python.org/dev/peps/pep-0008/>`_. If you would like to make a contribution and not familiar with PEP 8, please check the PEP 8 style guide first. Otherwise, the check won't pass. You should be careful about:
...@@ -166,6 +166,8 @@ The code style of Python-package follows `PEP 8 <https://www.python.org/dev/peps ...@@ -166,6 +166,8 @@ The code style of Python-package follows `PEP 8 <https://www.python.org/dev/peps
E501 (line too long) and W503 (line break occurred before a binary operator) can be ignored. E501 (line too long) and W503 (line break occurred before a binary operator) can be ignored.
Documentation strings (docstrings) are written in the NumPy style.
.. |License| image:: https://img.shields.io/badge/license-MIT-blue.svg .. |License| image:: https://img.shields.io/badge/license-MIT-blue.svg
:target: https://github.com/Microsoft/LightGBM/blob/master/LICENSE :target: https://github.com/Microsoft/LightGBM/blob/master/LICENSE
.. |Python Versions| image:: https://img.shields.io/pypi/pyversions/lightgbm.svg .. |Python Versions| image:: https://img.shields.io/pypi/pyversions/lightgbm.svg
......
# coding: utf-8 # coding: utf-8
"""LightGBM, Light Gradient Boosting Machine. """LightGBM, Light Gradient Boosting Machine.
Contributors: https://github.com/Microsoft/LightGBM/graphs/contributors Contributors: https://github.com/Microsoft/LightGBM/graphs/contributors.
""" """
from __future__ import absolute_import from __future__ import absolute_import
......
This diff is collapsed.
# coding: utf-8 # coding: utf-8
# pylint: disable = invalid-name, W0105, C0301 # pylint: disable = invalid-name, W0105, C0301
"""Callbacks library."""
from __future__ import absolute_import from __future__ import absolute_import
import collections import collections
...@@ -9,14 +10,18 @@ from .compat import range_ ...@@ -9,14 +10,18 @@ from .compat import range_
class EarlyStopException(Exception): class EarlyStopException(Exception):
"""Exception of early stopping. """Exception of early stopping."""
def __init__(self, best_iteration, best_score):
"""Create early stopping exception.
Parameters Parameters
---------- ----------
best_iteration : int best_iteration : int
The best iteration stopped. The best iteration stopped.
best_score : float
The score of the best iteration.
""" """
def __init__(self, best_iteration, best_score):
super(EarlyStopException, self).__init__() super(EarlyStopException, self).__init__()
self.best_iteration = best_iteration self.best_iteration = best_iteration
self.best_score = best_score self.best_score = best_score
...@@ -34,7 +39,7 @@ CallbackEnv = collections.namedtuple( ...@@ -34,7 +39,7 @@ CallbackEnv = collections.namedtuple(
def _format_eval_result(value, show_stdv=True): def _format_eval_result(value, show_stdv=True):
"""format metric string""" """Format metric string."""
if len(value) == 4: if len(value) == 4:
return '%s\'s %s: %g' % (value[0], value[1], value[2]) return '%s\'s %s: %g' % (value[0], value[1], value[2])
elif len(value) == 5: elif len(value) == 5:
...@@ -61,13 +66,12 @@ def print_evaluation(period=1, show_stdv=True): ...@@ -61,13 +66,12 @@ def print_evaluation(period=1, show_stdv=True):
callback : function callback : function
The callback that prints the evaluation results every ``period`` iteration(s). The callback that prints the evaluation results every ``period`` iteration(s).
""" """
def callback(env): def _callback(env):
"""internal function"""
if period > 0 and env.evaluation_result_list and (env.iteration + 1) % period == 0: if period > 0 and env.evaluation_result_list and (env.iteration + 1) % period == 0:
result = '\t'.join([_format_eval_result(x, show_stdv) for x in env.evaluation_result_list]) result = '\t'.join([_format_eval_result(x, show_stdv) for x in env.evaluation_result_list])
print('[%d]\t%s' % (env.iteration + 1, result)) print('[%d]\t%s' % (env.iteration + 1, result))
callback.order = 10 _callback.order = 10
return callback return _callback
def record_evaluation(eval_result): def record_evaluation(eval_result):
...@@ -87,19 +91,17 @@ def record_evaluation(eval_result): ...@@ -87,19 +91,17 @@ def record_evaluation(eval_result):
raise TypeError('Eval_result should be a dictionary') raise TypeError('Eval_result should be a dictionary')
eval_result.clear() eval_result.clear()
def init(env): def _init(env):
"""internal function"""
for data_name, _, _, _ in env.evaluation_result_list: for data_name, _, _, _ in env.evaluation_result_list:
eval_result.setdefault(data_name, collections.defaultdict(list)) eval_result.setdefault(data_name, collections.defaultdict(list))
def callback(env): def _callback(env):
"""internal function"""
if not eval_result: if not eval_result:
init(env) _init(env)
for data_name, eval_name, result, _ in env.evaluation_result_list: for data_name, eval_name, result, _ in env.evaluation_result_list:
eval_result[data_name][eval_name].append(result) eval_result[data_name][eval_name].append(result)
callback.order = 20 _callback.order = 20
return callback return _callback
def reset_parameter(**kwargs): def reset_parameter(**kwargs):
...@@ -111,7 +113,7 @@ def reset_parameter(**kwargs): ...@@ -111,7 +113,7 @@ def reset_parameter(**kwargs):
Parameters Parameters
---------- ----------
**kwargs: value should be list or function **kwargs : value should be list or function
List of parameters for each boosting round List of parameters for each boosting round
or a customized function that calculates the parameter in terms of or a customized function that calculates the parameter in terms of
current number of round (e.g. yields learning rate decay). current number of round (e.g. yields learning rate decay).
...@@ -123,8 +125,7 @@ def reset_parameter(**kwargs): ...@@ -123,8 +125,7 @@ def reset_parameter(**kwargs):
callback : function callback : function
The callback that resets the parameter after the first iteration. The callback that resets the parameter after the first iteration.
""" """
def callback(env): def _callback(env):
"""internal function"""
new_parameters = {} new_parameters = {}
for key, value in kwargs.items(): for key, value in kwargs.items():
if key in ['num_class', 'num_classes', if key in ['num_class', 'num_classes',
...@@ -143,9 +144,9 @@ def reset_parameter(**kwargs): ...@@ -143,9 +144,9 @@ def reset_parameter(**kwargs):
if new_parameters: if new_parameters:
env.model.reset_parameter(new_parameters) env.model.reset_parameter(new_parameters)
env.params.update(new_parameters) env.params.update(new_parameters)
callback.before_iteration = True _callback.before_iteration = True
callback.order = 10 _callback.order = 10
return callback return _callback
def early_stopping(stopping_rounds, verbose=True): def early_stopping(stopping_rounds, verbose=True):
...@@ -164,7 +165,6 @@ def early_stopping(stopping_rounds, verbose=True): ...@@ -164,7 +165,6 @@ def early_stopping(stopping_rounds, verbose=True):
---------- ----------
stopping_rounds : int stopping_rounds : int
The possible number of rounds without the trend occurrence. The possible number of rounds without the trend occurrence.
verbose : bool, optional (default=True) verbose : bool, optional (default=True)
Whether to print message with early stopping information. Whether to print message with early stopping information.
...@@ -178,8 +178,7 @@ def early_stopping(stopping_rounds, verbose=True): ...@@ -178,8 +178,7 @@ def early_stopping(stopping_rounds, verbose=True):
best_score_list = [] best_score_list = []
cmp_op = [] cmp_op = []
def init(env): def _init(env):
"""internal function"""
if not env.evaluation_result_list: if not env.evaluation_result_list:
raise ValueError('For early stopping, ' raise ValueError('For early stopping, '
'at least one dataset and eval metric is required for evaluation') 'at least one dataset and eval metric is required for evaluation')
...@@ -198,10 +197,9 @@ def early_stopping(stopping_rounds, verbose=True): ...@@ -198,10 +197,9 @@ def early_stopping(stopping_rounds, verbose=True):
best_score.append(float('inf')) best_score.append(float('inf'))
cmp_op.append(lt) cmp_op.append(lt)
def callback(env): def _callback(env):
"""internal function"""
if not cmp_op: if not cmp_op:
init(env) _init(env)
for i in range_(len(env.evaluation_result_list)): for i in range_(len(env.evaluation_result_list)):
score = env.evaluation_result_list[i][2] score = env.evaluation_result_list[i][2]
if cmp_op[i](score, best_score[i]): if cmp_op[i](score, best_score[i]):
...@@ -218,5 +216,5 @@ def early_stopping(stopping_rounds, verbose=True): ...@@ -218,5 +216,5 @@ def early_stopping(stopping_rounds, verbose=True):
print('Did not meet early stopping. Best iteration is:\n[%d]\t%s' % ( print('Did not meet early stopping. Best iteration is:\n[%d]\t%s' % (
best_iter[i] + 1, '\t'.join([_format_eval_result(x) for x in best_score_list[i]]))) best_iter[i] + 1, '\t'.join([_format_eval_result(x) for x in best_score_list[i]])))
raise EarlyStopException(best_iter[i], best_score_list[i]) raise EarlyStopException(best_iter[i], best_score_list[i])
callback.order = 30 _callback.order = 30
return callback return _callback
# coding: utf-8 # coding: utf-8
# pylint: disable = C0103 # pylint: disable = C0103
"""Compatibility""" """Compatibility library."""
from __future__ import absolute_import from __future__ import absolute_import
import inspect import inspect
...@@ -10,7 +10,7 @@ import numpy as np ...@@ -10,7 +10,7 @@ import numpy as np
is_py3 = (sys.version_info[0] == 3) is_py3 = (sys.version_info[0] == 3)
"""compatibility between python2 and python3""" """Compatibility between Python2 and Python3"""
if is_py3: if is_py3:
zip_ = zip zip_ = zip
string_type = str string_type = str
...@@ -19,10 +19,11 @@ if is_py3: ...@@ -19,10 +19,11 @@ if is_py3:
range_ = range range_ = range
def argc_(func): def argc_(func):
"""return number of arguments of a function""" """Count the number of arguments of a function."""
return len(inspect.signature(func).parameters) return len(inspect.signature(func).parameters)
def decode_string(bytestring): def decode_string(bytestring):
"""Decode C bytestring to ordinary string."""
return bytestring.decode('utf-8') return bytestring.decode('utf-8')
else: else:
from itertools import izip as zip_ from itertools import izip as zip_
...@@ -32,10 +33,11 @@ else: ...@@ -32,10 +33,11 @@ else:
range_ = xrange range_ = xrange
def argc_(func): def argc_(func):
"""return number of arguments of a function""" """Count the number of arguments of a function."""
return len(inspect.getargspec(func).args) return len(inspect.getargspec(func).args)
def decode_string(bytestring): def decode_string(bytestring):
"""Decode C bytestring to ordinary string."""
return bytestring return bytestring
"""json""" """json"""
...@@ -48,6 +50,7 @@ except (ImportError, SyntaxError): ...@@ -48,6 +50,7 @@ except (ImportError, SyntaxError):
def json_default_with_numpy(obj): def json_default_with_numpy(obj):
"""Convert numpy classes to JSON serializable objects."""
if isinstance(obj, (np.integer, np.floating, np.bool_)): if isinstance(obj, (np.integer, np.floating, np.bool_)):
return obj.item() return obj.item()
elif isinstance(obj, np.ndarray): elif isinstance(obj, np.ndarray):
...@@ -64,9 +67,13 @@ except ImportError: ...@@ -64,9 +67,13 @@ except ImportError:
PANDAS_INSTALLED = False PANDAS_INSTALLED = False
class Series(object): class Series(object):
"""Dummy class for pandas.Series."""
pass pass
class DataFrame(object): class DataFrame(object):
"""Dummy class for pandas.DataFrame."""
pass pass
"""matplotlib""" """matplotlib"""
...@@ -131,4 +138,6 @@ except ImportError: ...@@ -131,4 +138,6 @@ except ImportError:
# DeprecationWarning is not shown by default, so let's create our own with higher level # DeprecationWarning is not shown by default, so let's create our own with higher level
class LGBMDeprecationWarning(UserWarning): class LGBMDeprecationWarning(UserWarning):
"""Custom deprecation warning."""
pass pass
# coding: utf-8 # coding: utf-8
# pylint: disable = invalid-name, W0105 # pylint: disable = invalid-name, W0105
"""Training Library containing training routines of LightGBM.""" """Library with training routines of LightGBM."""
from __future__ import absolute_import from __future__ import absolute_import
import collections import collections
...@@ -30,21 +30,21 @@ def train(params, train_set, num_boost_round=100, ...@@ -30,21 +30,21 @@ def train(params, train_set, num_boost_round=100,
params : dict params : dict
Parameters for training. Parameters for training.
train_set : Dataset train_set : Dataset
Data to be trained. Data to be trained on.
num_boost_round: int, optional (default=100) num_boost_round : int, optional (default=100)
Number of boosting iterations. Number of boosting iterations.
valid_sets: list of Datasets or None, optional (default=None) valid_sets : list of Datasets or None, optional (default=None)
List of data to be evaluated during training. List of data to be evaluated on during training.
valid_names: list of string or None, optional (default=None) valid_names : list of strings or None, optional (default=None)
Names of ``valid_sets``. Names of ``valid_sets``.
fobj : callable or None, optional (default=None) fobj : callable or None, optional (default=None)
Customized objective function. Customized objective function.
feval : callable or None, optional (default=None) feval : callable or None, optional (default=None)
Customized evaluation function. Customized evaluation function.
Should accept two parameters: preds, train_data. Should accept two parameters: preds, train_data,
and return (eval_name, eval_result, is_higher_better) or list of such tuples.
For multi-class task, the preds is group by class_id first, then group by row_id. For multi-class task, the preds is group by class_id first, then group by row_id.
If you want to get i-th row preds in j-th class, the access way is preds[j * num_data + i]. If you want to get i-th row preds in j-th class, the access way is preds[j * num_data + i].
Note: should return (eval_name, eval_result, is_higher_better) or list of such tuples.
To ignore the default metric corresponding to the used objective, To ignore the default metric corresponding to the used objective,
set the ``metric`` parameter to the string ``"None"`` in ``params``. set the ``metric`` parameter to the string ``"None"`` in ``params``.
init_model : string, Booster or None, optional (default=None) init_model : string, Booster or None, optional (default=None)
...@@ -60,23 +60,24 @@ def train(params, train_set, num_boost_round=100, ...@@ -60,23 +60,24 @@ def train(params, train_set, num_boost_round=100,
All values in categorical features should be less than int32 max value (2147483647). All values in categorical features should be less than int32 max value (2147483647).
Large values could be memory consuming. Consider using consecutive integers starting from zero. Large values could be memory consuming. Consider using consecutive integers starting from zero.
All negative values in categorical features will be treated as missing values. All negative values in categorical features will be treated as missing values.
early_stopping_rounds: int or None, optional (default=None) early_stopping_rounds : int or None, optional (default=None)
Activates early stopping. The model will train until the validation score stops improving. Activates early stopping. The model will train until the validation score stops improving.
Validation score needs to improve at least every ``early_stopping_rounds`` round(s) Validation score needs to improve at least every ``early_stopping_rounds`` round(s)
to continue training. to continue training.
Requires at least one validation data and one metric. Requires at least one validation data and one metric.
If there's more than one, will check all of them. But the training data is ignored anyway. If there's more than one, will check all of them. But the training data is ignored anyway.
If early stopping occurs, the model will add ``best_iteration`` field. If early stopping occurs, the model will add ``best_iteration`` field.
evals_result: dict or None, optional (default=None) evals_result : dict or None, optional (default=None)
This dictionary used to store all evaluation results of all the items in ``valid_sets``. This dictionary used to store all evaluation results of all the items in ``valid_sets``.
Example Example
------- -------
With a ``valid_sets`` = [valid_set, train_set], With a ``valid_sets`` = [valid_set, train_set],
``valid_names`` = ['eval', 'train'] ``valid_names`` = ['eval', 'train']
and a ``params`` = ('metric':'logloss') and a ``params`` = {'metric': 'logloss'}
returns: {'train': {'logloss': ['0.48253', '0.35953', ...]}, returns {'train': {'logloss': ['0.48253', '0.35953', ...]},
'eval': {'logloss': ['0.480385', '0.357756', ...]}}. 'eval': {'logloss': ['0.480385', '0.357756', ...]}}.
verbose_eval : bool or int, optional (default=True) verbose_eval : bool or int, optional (default=True)
Requires at least one validation data. Requires at least one validation data.
If True, the eval metric on the valid set is printed at each boosting stage. If True, the eval metric on the valid set is printed at each boosting stage.
...@@ -85,9 +86,10 @@ def train(params, train_set, num_boost_round=100, ...@@ -85,9 +86,10 @@ def train(params, train_set, num_boost_round=100,
Example Example
------- -------
With ``verbose_eval`` = 4 and at least one item in evals, With ``verbose_eval`` = 4 and at least one item in ``valid_sets``,
an evaluation metric is printed every 4 (instead of 1) boosting stages. an evaluation metric is printed every 4 (instead of 1) boosting stages.
learning_rates: list, callable or None, optional (default=None)
learning_rates : list, callable or None, optional (default=None)
List of learning rates for each boosting round List of learning rates for each boosting round
or a customized function that calculates ``learning_rate`` or a customized function that calculates ``learning_rate``
in terms of current number of round (e.g. yields learning rate decay). in terms of current number of round (e.g. yields learning rate decay).
...@@ -238,31 +240,30 @@ def train(params, train_set, num_boost_round=100, ...@@ -238,31 +240,30 @@ def train(params, train_set, num_boost_round=100,
return booster return booster
class CVBooster(object): class _CVBooster(object):
""""Auxiliary data struct to hold all boosters of CV.""" """Auxiliary data struct to hold all boosters of CV."""
def __init__(self): def __init__(self):
self.boosters = [] self.boosters = []
self.best_iteration = -1 self.best_iteration = -1
def append(self, booster): def append(self, booster):
"""add a booster to CVBooster""" """Add a booster to _CVBooster."""
self.boosters.append(booster) self.boosters.append(booster)
def __getattr__(self, name): def __getattr__(self, name):
"""redirect methods call of CVBooster""" """Redirect methods call of _CVBooster."""
def handlerFunction(*args, **kwargs): def handler_function(*args, **kwargs):
"""call methods with each booster, and concatenate their results""" """Call methods with each booster, and concatenate their results."""
ret = [] ret = []
for booster in self.boosters: for booster in self.boosters:
ret.append(getattr(booster, name)(*args, **kwargs)) ret.append(getattr(booster, name)(*args, **kwargs))
return ret return ret
return handlerFunction return handler_function
def _make_n_folds(full_data, folds, nfold, params, seed, fpreproc=None, stratified=True, shuffle=True): def _make_n_folds(full_data, folds, nfold, params, seed, fpreproc=None, stratified=True, shuffle=True):
""" """Make a n-fold list of Booster from random indices."""
Make an n-fold list of Booster from random indices.
"""
full_data = full_data.construct() full_data = full_data.construct()
num_data = full_data.num_data() num_data = full_data.num_data()
if folds is not None: if folds is not None:
...@@ -301,7 +302,7 @@ def _make_n_folds(full_data, folds, nfold, params, seed, fpreproc=None, stratifi ...@@ -301,7 +302,7 @@ def _make_n_folds(full_data, folds, nfold, params, seed, fpreproc=None, stratifi
train_id = [np.concatenate([test_id[i] for i in range_(nfold) if k != i]) for k in range_(nfold)] train_id = [np.concatenate([test_id[i] for i in range_(nfold) if k != i]) for k in range_(nfold)]
folds = zip_(train_id, test_id) folds = zip_(train_id, test_id)
ret = CVBooster() ret = _CVBooster()
for train_idx, test_idx in folds: for train_idx, test_idx in folds:
train_set = full_data.subset(train_idx) train_set = full_data.subset(train_idx)
valid_set = full_data.subset(test_idx) valid_set = full_data.subset(test_idx)
...@@ -317,9 +318,7 @@ def _make_n_folds(full_data, folds, nfold, params, seed, fpreproc=None, stratifi ...@@ -317,9 +318,7 @@ def _make_n_folds(full_data, folds, nfold, params, seed, fpreproc=None, stratifi
def _agg_cv_result(raw_results): def _agg_cv_result(raw_results):
""" """Aggregate cross-validation results."""
Aggregate cross-validation results.
"""
cvmap = collections.defaultdict(list) cvmap = collections.defaultdict(list)
metric_type = {} metric_type = {}
for one_result in raw_results: for one_result in raw_results:
...@@ -356,7 +355,7 @@ def cv(params, train_set, num_boost_round=100, ...@@ -356,7 +355,7 @@ def cv(params, train_set, num_boost_round=100,
Number of folds in CV. Number of folds in CV.
stratified : bool, optional (default=True) stratified : bool, optional (default=True)
Whether to perform stratified sampling. Whether to perform stratified sampling.
shuffle: bool, optional (default=True) shuffle : bool, optional (default=True)
Whether to shuffle before splitting data. Whether to shuffle before splitting data.
metrics : string, list of strings or None, optional (default=None) metrics : string, list of strings or None, optional (default=None)
Evaluation metrics to be monitored while CV. Evaluation metrics to be monitored while CV.
...@@ -365,10 +364,10 @@ def cv(params, train_set, num_boost_round=100, ...@@ -365,10 +364,10 @@ def cv(params, train_set, num_boost_round=100,
Custom objective function. Custom objective function.
feval : callable or None, optional (default=None) feval : callable or None, optional (default=None)
Customized evaluation function. Customized evaluation function.
Should accept two parameters: preds, train_data. Should accept two parameters: preds, train_data,
and return (eval_name, eval_result, is_higher_better) or list of such tuples.
For multi-class task, the preds is group by class_id first, then group by row_id. For multi-class task, the preds is group by class_id first, then group by row_id.
If you want to get i-th row preds in j-th class, the access way is preds[j * num_data + i]. If you want to get i-th row preds in j-th class, the access way is preds[j * num_data + i].
Note: should return (eval_name, eval_result, is_higher_better) or list of such tuples.
To ignore the default metric corresponding to the used objective, To ignore the default metric corresponding to the used objective,
set ``metrics`` to the string ``"None"``. set ``metrics`` to the string ``"None"``.
init_model : string, Booster or None, optional (default=None) init_model : string, Booster or None, optional (default=None)
...@@ -384,12 +383,12 @@ def cv(params, train_set, num_boost_round=100, ...@@ -384,12 +383,12 @@ def cv(params, train_set, num_boost_round=100,
All values in categorical features should be less than int32 max value (2147483647). All values in categorical features should be less than int32 max value (2147483647).
Large values could be memory consuming. Consider using consecutive integers starting from zero. Large values could be memory consuming. Consider using consecutive integers starting from zero.
All negative values in categorical features will be treated as missing values. All negative values in categorical features will be treated as missing values.
early_stopping_rounds: int or None, optional (default=None) early_stopping_rounds : int or None, optional (default=None)
Activates early stopping. Activates early stopping.
CV score needs to improve at least every ``early_stopping_rounds`` round(s) CV score needs to improve at least every ``early_stopping_rounds`` round(s)
to continue. to continue.
Requires at least one metric. If there's more than one, will check all of them. Requires at least one metric. If there's more than one, will check all of them.
Last entry in evaluation history is the one from best iteration. Last entry in evaluation history is the one from the best iteration.
fpreproc : callable or None, optional (default=None) fpreproc : callable or None, optional (default=None)
Preprocessing function that takes (dtrain, dtest, params) Preprocessing function that takes (dtrain, dtest, params)
and returns transformed versions of those. and returns transformed versions of those.
...@@ -400,7 +399,7 @@ def cv(params, train_set, num_boost_round=100, ...@@ -400,7 +399,7 @@ def cv(params, train_set, num_boost_round=100,
If int, progress will be displayed at every given ``verbose_eval`` boosting stage. If int, progress will be displayed at every given ``verbose_eval`` boosting stage.
show_stdv : bool, optional (default=True) show_stdv : bool, optional (default=True)
Whether to display the standard deviation in progress. Whether to display the standard deviation in progress.
Results are not affected by this parameter, and always contains std. Results are not affected by this parameter, and always contain std.
seed : int, optional (default=0) seed : int, optional (default=0)
Seed used to generate the folds (passed to numpy.random.seed). Seed used to generate the folds (passed to numpy.random.seed).
callbacks : list of callables or None, optional (default=None) callbacks : list of callables or None, optional (default=None)
......
# coding: utf-8 # coding: utf-8
"""Find the path to lightgbm dynamic library files.""" """Find the path to LightGBM dynamic library files."""
import os import os
from platform import system from platform import system
...@@ -7,17 +7,19 @@ from platform import system ...@@ -7,17 +7,19 @@ from platform import system
def find_lib_path(): def find_lib_path():
"""Find the path to LightGBM library files. """Find the path to LightGBM library files.
Returns Returns
------- -------
lib_path: list(string) lib_path: list of strings
List of all found library path to LightGBM List of all found library paths to LightGBM.
""" """
if os.environ.get('LIGHTGBM_BUILD_DOC', False): if os.environ.get('LIGHTGBM_BUILD_DOC', False):
# we don't need lib_lightgbm while building docs # we don't need lib_lightgbm while building docs
return [] return []
curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__))) curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
dll_path = [curr_path, os.path.join(curr_path, '../../'), dll_path = [curr_path,
os.path.join(curr_path, '../../'),
os.path.join(curr_path, 'compile'), os.path.join(curr_path, 'compile'),
os.path.join(curr_path, '../compile'), os.path.join(curr_path, '../compile'),
os.path.join(curr_path, '../../lib/')] os.path.join(curr_path, '../../lib/')]
...@@ -32,5 +34,5 @@ def find_lib_path(): ...@@ -32,5 +34,5 @@ def find_lib_path():
lib_path = [p for p in dll_path if os.path.exists(p) and os.path.isfile(p)] lib_path = [p for p in dll_path if os.path.exists(p) and os.path.isfile(p)]
if not lib_path: if not lib_path:
dll_path = [os.path.realpath(p) for p in dll_path] dll_path = [os.path.realpath(p) for p in dll_path]
raise Exception('Cannot find lightgbm library in following paths: ' + '\n'.join(dll_path)) raise Exception('Cannot find lightgbm library file in following paths:\n' + '\n'.join(dll_path))
return lib_path return lib_path
# coding: utf-8 # coding: utf-8
# pylint: disable = C0103 # pylint: disable = C0103
"""Plotting Library.""" """Plotting library."""
from __future__ import absolute_import from __future__ import absolute_import
import warnings import warnings
...@@ -15,8 +15,8 @@ from .compat import (MATPLOTLIB_INSTALLED, GRAPHVIZ_INSTALLED, LGBMDeprecationWa ...@@ -15,8 +15,8 @@ from .compat import (MATPLOTLIB_INSTALLED, GRAPHVIZ_INSTALLED, LGBMDeprecationWa
from .sklearn import LGBMModel from .sklearn import LGBMModel
def check_not_tuple_of_2_elements(obj, obj_name='obj'): def _check_not_tuple_of_2_elements(obj, obj_name='obj'):
"""check object is not tuple or does not have 2 elements""" """Check object is not tuple or does not have 2 elements."""
if not isinstance(obj, tuple) or len(obj) != 2: if not isinstance(obj, tuple) or len(obj) != 2:
raise TypeError('%s must be a tuple of 2 elements.' % obj_name) raise TypeError('%s must be a tuple of 2 elements.' % obj_name)
...@@ -63,7 +63,7 @@ def plot_importance(booster, ax=None, height=0.2, ...@@ -63,7 +63,7 @@ def plot_importance(booster, ax=None, height=0.2,
Figure size. Figure size.
grid : bool, optional (default=True) grid : bool, optional (default=True)
Whether to add a grid for axes. Whether to add a grid for axes.
**kwargs : other parameters **kwargs
Other parameters passed to ``ax.barh()``. Other parameters passed to ``ax.barh()``.
Returns Returns
...@@ -96,7 +96,7 @@ def plot_importance(booster, ax=None, height=0.2, ...@@ -96,7 +96,7 @@ def plot_importance(booster, ax=None, height=0.2,
if ax is None: if ax is None:
if figsize is not None: if figsize is not None:
check_not_tuple_of_2_elements(figsize, 'figsize') _check_not_tuple_of_2_elements(figsize, 'figsize')
_, ax = plt.subplots(1, 1, figsize=figsize) _, ax = plt.subplots(1, 1, figsize=figsize)
ylocs = np.arange(len(values)) ylocs = np.arange(len(values))
...@@ -109,13 +109,13 @@ def plot_importance(booster, ax=None, height=0.2, ...@@ -109,13 +109,13 @@ def plot_importance(booster, ax=None, height=0.2,
ax.set_yticklabels(labels) ax.set_yticklabels(labels)
if xlim is not None: if xlim is not None:
check_not_tuple_of_2_elements(xlim, 'xlim') _check_not_tuple_of_2_elements(xlim, 'xlim')
else: else:
xlim = (0, max(values) * 1.1) xlim = (0, max(values) * 1.1)
ax.set_xlim(xlim) ax.set_xlim(xlim)
if ylim is not None: if ylim is not None:
check_not_tuple_of_2_elements(ylim, 'ylim') _check_not_tuple_of_2_elements(ylim, 'ylim')
else: else:
ylim = (-1, len(values)) ylim = (-1, len(values))
ax.set_ylim(ylim) ax.set_ylim(ylim)
...@@ -194,7 +194,7 @@ def plot_metric(booster, metric=None, dataset_names=None, ...@@ -194,7 +194,7 @@ def plot_metric(booster, metric=None, dataset_names=None,
if ax is None: if ax is None:
if figsize is not None: if figsize is not None:
check_not_tuple_of_2_elements(figsize, 'figsize') _check_not_tuple_of_2_elements(figsize, 'figsize')
_, ax = plt.subplots(1, 1, figsize=figsize) _, ax = plt.subplots(1, 1, figsize=figsize)
if dataset_names is None: if dataset_names is None:
...@@ -229,13 +229,13 @@ def plot_metric(booster, metric=None, dataset_names=None, ...@@ -229,13 +229,13 @@ def plot_metric(booster, metric=None, dataset_names=None,
ax.legend(loc='best') ax.legend(loc='best')
if xlim is not None: if xlim is not None:
check_not_tuple_of_2_elements(xlim, 'xlim') _check_not_tuple_of_2_elements(xlim, 'xlim')
else: else:
xlim = (0, num_iteration) xlim = (0, num_iteration)
ax.set_xlim(xlim) ax.set_xlim(xlim)
if ylim is not None: if ylim is not None:
check_not_tuple_of_2_elements(ylim, 'ylim') _check_not_tuple_of_2_elements(ylim, 'ylim')
else: else:
range_result = max_result - min_result range_result = max_result - min_result
ylim = (min_result - range_result * 0.2, max_result + range_result * 0.2) ylim = (min_result - range_result * 0.2, max_result + range_result * 0.2)
...@@ -270,7 +270,7 @@ def _to_graphviz(tree_info, show_info, feature_names, precision=None, **kwargs): ...@@ -270,7 +270,7 @@ def _to_graphviz(tree_info, show_info, feature_names, precision=None, **kwargs):
if precision is not None and not isinstance(value, string_type) else str(value) if precision is not None and not isinstance(value, string_type) else str(value)
def add(root, parent=None, decision=None): def add(root, parent=None, decision=None):
"""recursively add node or edge""" """Recursively add node or edge."""
if 'split_index' in root: # non-leaf if 'split_index' in root: # non-leaf
name = 'split{0}'.format(root['split_index']) name = 'split{0}'.format(root['split_index'])
if feature_names is not None: if feature_names is not None:
...@@ -322,7 +322,7 @@ def create_tree_digraph(booster, tree_index=0, show_info=None, precision=None, ...@@ -322,7 +322,7 @@ def create_tree_digraph(booster, tree_index=0, show_info=None, precision=None,
Parameters Parameters
---------- ----------
booster : Booster or LGBMModel booster : Booster or LGBMModel
Booster or LGBMModel instance. Booster or LGBMModel instance to be converted.
tree_index : int, optional (default=0) tree_index : int, optional (default=0)
The index of a target tree to convert. The index of a target tree to convert.
show_info : list of strings or None, optional (default=None) show_info : list of strings or None, optional (default=None)
...@@ -330,7 +330,7 @@ def create_tree_digraph(booster, tree_index=0, show_info=None, precision=None, ...@@ -330,7 +330,7 @@ def create_tree_digraph(booster, tree_index=0, show_info=None, precision=None,
Possible values of list items: 'split_gain', 'internal_value', 'internal_count', 'leaf_count'. Possible values of list items: 'split_gain', 'internal_value', 'internal_count', 'leaf_count'.
precision : int or None, optional (default=None) precision : int or None, optional (default=None)
Used to restrict the display of floating point values to a certain precision. Used to restrict the display of floating point values to a certain precision.
**kwargs : other parameters **kwargs
Other parameters passed to ``Digraph`` constructor. Other parameters passed to ``Digraph`` constructor.
Check https://graphviz.readthedocs.io/en/stable/api.html#digraph for the full list of supported parameters. Check https://graphviz.readthedocs.io/en/stable/api.html#digraph for the full list of supported parameters.
...@@ -407,7 +407,7 @@ def plot_tree(booster, ax=None, tree_index=0, figsize=None, ...@@ -407,7 +407,7 @@ def plot_tree(booster, ax=None, tree_index=0, figsize=None,
Possible values of list items: 'split_gain', 'internal_value', 'internal_count', 'leaf_count'. Possible values of list items: 'split_gain', 'internal_value', 'internal_count', 'leaf_count'.
precision : int or None, optional (default=None) precision : int or None, optional (default=None)
Used to restrict the display of floating point values to a certain precision. Used to restrict the display of floating point values to a certain precision.
**kwargs : other parameters **kwargs
Other parameters passed to ``Digraph`` constructor. Other parameters passed to ``Digraph`` constructor.
Check https://graphviz.readthedocs.io/en/stable/api.html#digraph for the full list of supported parameters. Check https://graphviz.readthedocs.io/en/stable/api.html#digraph for the full list of supported parameters.
...@@ -433,7 +433,7 @@ def plot_tree(booster, ax=None, tree_index=0, figsize=None, ...@@ -433,7 +433,7 @@ def plot_tree(booster, ax=None, tree_index=0, figsize=None,
if ax is None: if ax is None:
if figsize is not None: if figsize is not None:
check_not_tuple_of_2_elements(figsize, 'figsize') _check_not_tuple_of_2_elements(figsize, 'figsize')
_, ax = plt.subplots(1, 1, figsize=figsize) _, ax = plt.subplots(1, 1, figsize=figsize)
graph = create_tree_digraph(booster=booster, tree_index=tree_index, graph = create_tree_digraph(booster=booster, tree_index=tree_index,
......
# coding: utf-8 # coding: utf-8
# pylint: disable = invalid-name, W0105, C0111, C0301 # pylint: disable = invalid-name, W0105, C0111, C0301
"""Scikit-Learn Wrapper interface for LightGBM.""" """Scikit-learn wrapper interface for LightGBM."""
from __future__ import absolute_import from __future__ import absolute_import
import numpy as np import numpy as np
...@@ -16,8 +16,11 @@ from .engine import train ...@@ -16,8 +16,11 @@ from .engine import train
def _objective_function_wrapper(func): def _objective_function_wrapper(func):
"""Decorate an objective function """Decorate an objective function.
Note: for multi-class task, the y_pred is group by class_id first, then group by row_id.
Note
----
For multi-class task, the y_pred is group by class_id first, then group by row_id.
If you want to get i-th row y_pred in j-th class, the access way is y_pred[j * num_data + i] If you want to get i-th row y_pred in j-th class, the access way is y_pred[j * num_data + i]
and you should group grad and hess in this way as well. and you should group grad and hess in this way as well.
...@@ -25,9 +28,10 @@ def _objective_function_wrapper(func): ...@@ -25,9 +28,10 @@ def _objective_function_wrapper(func):
---------- ----------
func : callable func : callable
Expects a callable with signature ``func(y_true, y_pred)`` or ``func(y_true, y_pred, group): Expects a callable with signature ``func(y_true, y_pred)`` or ``func(y_true, y_pred, group):
y_true : array-like of shape = [n_samples] y_true : array-like of shape = [n_samples]
The target values. The target values.
y_pred : array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class) y_pred : array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task)
The predicted values. The predicted values.
group : array-like group : array-like
Group/query data, used for ranking task. Group/query data, used for ranking task.
...@@ -38,14 +42,13 @@ def _objective_function_wrapper(func): ...@@ -38,14 +42,13 @@ def _objective_function_wrapper(func):
The new objective function as expected by ``lightgbm.engine.train``. The new objective function as expected by ``lightgbm.engine.train``.
The signature is ``new_func(preds, dataset)``: The signature is ``new_func(preds, dataset)``:
preds : array-like of shape = [n_samples] or shape = [n_samples * n_classes] preds : array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task)
The predicted values. The predicted values.
dataset : ``dataset`` dataset : Dataset
The training set from which the labels will be extracted using The training set from which the labels will be extracted using ``dataset.get_label()``.
``dataset.get_label()``.
""" """
def inner(preds, dataset): def inner(preds, dataset):
"""internal function""" """Call passed function with appropriate arguments."""
labels = dataset.get_label() labels = dataset.get_label()
argc = argc_(func) argc = argc_(func)
if argc == 2: if argc == 2:
...@@ -76,24 +79,27 @@ def _objective_function_wrapper(func): ...@@ -76,24 +79,27 @@ def _objective_function_wrapper(func):
def _eval_function_wrapper(func): def _eval_function_wrapper(func):
"""Decorate an eval function """Decorate an eval function.
Note: for multi-class task, the y_pred is group by class_id first, then group by row_id.
Note
----
For multi-class task, the y_pred is group by class_id first, then group by row_id.
If you want to get i-th row y_pred in j-th class, the access way is y_pred[j * num_data + i]. If you want to get i-th row y_pred in j-th class, the access way is y_pred[j * num_data + i].
Parameters Parameters
---------- ----------
func : callable func : callable
Expects a callable with following functions: Expects a callable with following signatures:
``func(y_true, y_pred)``, ``func(y_true, y_pred)``,
``func(y_true, y_pred, weight)`` ``func(y_true, y_pred, weight)``
or ``func(y_true, y_pred, weight, group)`` or ``func(y_true, y_pred, weight, group)``
and return (eval_name->str, eval_result->float, is_bigger_better->Bool): and returns (eval_name->string, eval_result->float, is_bigger_better->bool):
y_true : array-like of shape = [n_samples] y_true : array-like of shape = [n_samples]
The target values. The target values.
y_pred : array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class) y_pred : array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task)
The predicted values. The predicted values.
weight : array_like of shape = [n_samples] weight : array-like of shape = [n_samples]
The weight of samples. The weight of samples.
group : array-like group : array-like
Group/query data, used for ranking task. Group/query data, used for ranking task.
...@@ -104,14 +110,13 @@ def _eval_function_wrapper(func): ...@@ -104,14 +110,13 @@ def _eval_function_wrapper(func):
The new eval function as expected by ``lightgbm.engine.train``. The new eval function as expected by ``lightgbm.engine.train``.
The signature is ``new_func(preds, dataset)``: The signature is ``new_func(preds, dataset)``:
preds : array-like of shape = [n_samples] or shape = [n_samples * n_classes] preds : array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task)
The predicted values. The predicted values.
dataset : ``dataset`` dataset : Dataset
The training set from which the labels will be extracted using The training set from which the labels will be extracted using ``dataset.get_label()``.
``dataset.get_label()``.
""" """
def inner(preds, dataset): def inner(preds, dataset):
"""internal function""" """Call passed function with appropriate arguments."""
labels = dataset.get_label() labels = dataset.get_label()
argc = argc_(func) argc = argc_(func)
if argc == 2: if argc == 2:
...@@ -128,18 +133,18 @@ def _eval_function_wrapper(func): ...@@ -128,18 +133,18 @@ def _eval_function_wrapper(func):
class LGBMModel(_LGBMModelBase): class LGBMModel(_LGBMModelBase):
"""Implementation of the scikit-learn API for LightGBM.""" """Implementation of the scikit-learn API for LightGBM."""
def __init__(self, boosting_type="gbdt", num_leaves=31, max_depth=-1, def __init__(self, boosting_type='gbdt', num_leaves=31, max_depth=-1,
learning_rate=0.1, n_estimators=100, learning_rate=0.1, n_estimators=100,
subsample_for_bin=200000, objective=None, class_weight=None, subsample_for_bin=200000, objective=None, class_weight=None,
min_split_gain=0., min_child_weight=1e-3, min_child_samples=20, min_split_gain=0., min_child_weight=1e-3, min_child_samples=20,
subsample=1., subsample_freq=0, colsample_bytree=1., subsample=1., subsample_freq=0, colsample_bytree=1.,
reg_alpha=0., reg_lambda=0., random_state=None, reg_alpha=0., reg_lambda=0., random_state=None,
n_jobs=-1, silent=True, importance_type='split', **kwargs): n_jobs=-1, silent=True, importance_type='split', **kwargs):
"""Construct a gradient boosting model. r"""Construct a gradient boosting model.
Parameters Parameters
---------- ----------
boosting_type : string, optional (default="gbdt") boosting_type : string, optional (default='gbdt')
'gbdt', traditional Gradient Boosting Decision Tree. 'gbdt', traditional Gradient Boosting Decision Tree.
'dart', Dropouts meet Multiple Additive Regression Trees. 'dart', Dropouts meet Multiple Additive Regression Trees.
'goss', Gradient-based One-Side Sampling. 'goss', Gradient-based One-Side Sampling.
...@@ -168,14 +173,14 @@ class LGBMModel(_LGBMModelBase): ...@@ -168,14 +173,14 @@ class LGBMModel(_LGBMModelBase):
The 'balanced' mode uses the values of y to automatically adjust weights The 'balanced' mode uses the values of y to automatically adjust weights
inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))``. inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))``.
If None, all classes are supposed to have weight one. If None, all classes are supposed to have weight one.
Note that these weights will be multiplied with ``sample_weight`` (passed through the fit method) Note, that these weights will be multiplied with ``sample_weight`` (passed through the ``fit`` method)
if ``sample_weight`` is specified. if ``sample_weight`` is specified.
min_split_gain : float, optional (default=0.) min_split_gain : float, optional (default=0.)
Minimum loss reduction required to make a further partition on a leaf node of the tree. Minimum loss reduction required to make a further partition on a leaf node of the tree.
min_child_weight : float, optional (default=1e-3) min_child_weight : float, optional (default=1e-3)
Minimum sum of instance weight(hessian) needed in a child(leaf). Minimum sum of instance weight (hessian) needed in a child (leaf).
min_child_samples : int, optional (default=20) min_child_samples : int, optional (default=20)
Minimum number of data need in a child(leaf). Minimum number of data needed in a child (leaf).
subsample : float, optional (default=1.) subsample : float, optional (default=1.)
Subsample ratio of the training instance. Subsample ratio of the training instance.
subsample_freq : int, optional (default=0) subsample_freq : int, optional (default=0)
...@@ -195,14 +200,15 @@ class LGBMModel(_LGBMModelBase): ...@@ -195,14 +200,15 @@ class LGBMModel(_LGBMModelBase):
Whether to print messages while running boosting. Whether to print messages while running boosting.
importance_type : string, optional (default='split') importance_type : string, optional (default='split')
The type of feature importance to be filled into ``feature_importances_``. The type of feature importance to be filled into ``feature_importances_``.
If "split", result contains numbers of times the feature is used in a model. If 'split', result contains numbers of times the feature is used in a model.
If "gain", result contains total gains of splits which use the feature. If 'gain', result contains total gains of splits which use the feature.
**kwargs : other parameters **kwargs
Other parameters for the model.
Check http://lightgbm.readthedocs.io/en/latest/Parameters.html for more parameters. Check http://lightgbm.readthedocs.io/en/latest/Parameters.html for more parameters.
Note Note
---- ----
\\*\\*kwargs is not supported in sklearn, it may cause unexpected issues. \*\*kwargs is not supported in sklearn, it may cause unexpected issues.
Attributes Attributes
---------- ----------
...@@ -227,8 +233,8 @@ class LGBMModel(_LGBMModelBase): ...@@ -227,8 +233,8 @@ class LGBMModel(_LGBMModelBase):
Note Note
---- ----
A custom objective function can be provided for the ``objective`` A custom objective function can be provided for the ``objective`` parameter.
parameter. In this case, it should have the signature In this case, it should have the signature
``objective(y_true, y_pred) -> grad, hess`` or ``objective(y_true, y_pred) -> grad, hess`` or
``objective(y_true, y_pred, group) -> grad, hess``: ``objective(y_true, y_pred, group) -> grad, hess``:
...@@ -282,12 +288,37 @@ class LGBMModel(_LGBMModelBase): ...@@ -282,12 +288,37 @@ class LGBMModel(_LGBMModelBase):
self.set_params(**kwargs) self.set_params(**kwargs)
def get_params(self, deep=True): def get_params(self, deep=True):
"""Get parameters for this estimator.
Parameters
----------
deep : bool, optional (default=True)
If True, will return the parameters for this estimator and
contained subobjects that are estimators.
Returns
-------
params : dict
Parameter names mapped to their values.
"""
params = super(LGBMModel, self).get_params(deep=deep) params = super(LGBMModel, self).get_params(deep=deep)
params.update(self._other_params) params.update(self._other_params)
return params return params
# minor change to support `**kwargs` # minor change to support `**kwargs`
def set_params(self, **params): def set_params(self, **params):
"""Set the parameters of this estimator.
Parameters
----------
**params
Parameter names with their new values.
Returns
-------
self : object
Returns self.
"""
for key, value in params.items(): for key, value in params.items():
setattr(self, key, value) setattr(self, key, value)
if hasattr(self, '_' + key): if hasattr(self, '_' + key):
...@@ -340,10 +371,10 @@ class LGBMModel(_LGBMModelBase): ...@@ -340,10 +371,10 @@ class LGBMModel(_LGBMModelBase):
If there's more than one, will check all of them. But the training data is ignored anyway. If there's more than one, will check all of them. But the training data is ignored anyway.
verbose : bool, optional (default=True) verbose : bool, optional (default=True)
If True and an evaluation set is used, writes the evaluation progress. If True and an evaluation set is used, writes the evaluation progress.
feature_name : list of strings or 'auto', optional (default="auto") feature_name : list of strings or 'auto', optional (default='auto')
Feature names. Feature names.
If 'auto' and data is pandas DataFrame, data columns names are used. If 'auto' and data is pandas DataFrame, data columns names are used.
categorical_feature : list of strings or int, or 'auto', optional (default="auto") categorical_feature : list of strings or int, or 'auto', optional (default='auto')
Categorical features. Categorical features.
If list of int, interpreted as indices. If list of int, interpreted as indices.
If list of strings, interpreted as feature names (need to specify ``feature_name`` as well). If list of strings, interpreted as feature names (need to specify ``feature_name`` as well).
...@@ -362,15 +393,15 @@ class LGBMModel(_LGBMModelBase): ...@@ -362,15 +393,15 @@ class LGBMModel(_LGBMModelBase):
Note Note
---- ----
Custom eval function expects a callable with following functions: Custom eval function expects a callable with following signatures:
``func(y_true, y_pred)``, ``func(y_true, y_pred, weight)`` or ``func(y_true, y_pred)``, ``func(y_true, y_pred, weight)`` or
``func(y_true, y_pred, weight, group)``. ``func(y_true, y_pred, weight, group)``
Returns (eval_name, eval_result, is_bigger_better) or and returns (eval_name, eval_result, is_bigger_better) or
list of (eval_name, eval_result, is_bigger_better) list of (eval_name, eval_result, is_bigger_better):
y_true : array-like of shape = [n_samples] y_true : array-like of shape = [n_samples]
The target values. The target values.
y_pred : array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class) y_pred : array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task)
The predicted values. The predicted values.
weight : array-like of shape = [n_samples] weight : array-like of shape = [n_samples]
The weight of samples. The weight of samples.
...@@ -539,7 +570,8 @@ class LGBMModel(_LGBMModelBase): ...@@ -539,7 +570,8 @@ class LGBMModel(_LGBMModelBase):
like SHAP interaction values, like SHAP interaction values,
you can install shap package (https://github.com/slundberg/shap). you can install shap package (https://github.com/slundberg/shap).
**kwargs : other parameters for the prediction **kwargs
Other parameters for the prediction.
Returns Returns
------- -------
...@@ -629,7 +661,7 @@ class LGBMRegressor(LGBMModel, _LGBMRegressorBase): ...@@ -629,7 +661,7 @@ class LGBMRegressor(LGBMModel, _LGBMRegressorBase):
eval_set=None, eval_names=None, eval_sample_weight=None, eval_set=None, eval_names=None, eval_sample_weight=None,
eval_init_score=None, eval_metric=None, early_stopping_rounds=None, eval_init_score=None, eval_metric=None, early_stopping_rounds=None,
verbose=True, feature_name='auto', categorical_feature='auto', callbacks=None): verbose=True, feature_name='auto', categorical_feature='auto', callbacks=None):
"""Docstring is inherited from the LGBMModel."""
super(LGBMRegressor, self).fit(X, y, sample_weight=sample_weight, super(LGBMRegressor, self).fit(X, y, sample_weight=sample_weight,
init_score=init_score, eval_set=eval_set, init_score=init_score, eval_set=eval_set,
eval_names=eval_names, eval_names=eval_names,
...@@ -656,6 +688,7 @@ class LGBMClassifier(LGBMModel, _LGBMClassifierBase): ...@@ -656,6 +688,7 @@ class LGBMClassifier(LGBMModel, _LGBMClassifierBase):
eval_class_weight=None, eval_init_score=None, eval_metric=None, eval_class_weight=None, eval_init_score=None, eval_metric=None,
early_stopping_rounds=None, verbose=True, early_stopping_rounds=None, verbose=True,
feature_name='auto', categorical_feature='auto', callbacks=None): feature_name='auto', categorical_feature='auto', callbacks=None):
"""Docstring is inherited from the LGBMModel."""
_LGBMAssertAllFinite(y) _LGBMAssertAllFinite(y)
_LGBMCheckClassificationTargets(y) _LGBMCheckClassificationTargets(y)
self._le = _LGBMLabelEncoder().fit(y) self._le = _LGBMLabelEncoder().fit(y)
...@@ -704,6 +737,7 @@ class LGBMClassifier(LGBMModel, _LGBMClassifierBase): ...@@ -704,6 +737,7 @@ class LGBMClassifier(LGBMModel, _LGBMClassifierBase):
def predict(self, X, raw_score=False, num_iteration=None, def predict(self, X, raw_score=False, num_iteration=None,
pred_leaf=False, pred_contrib=False, **kwargs): pred_leaf=False, pred_contrib=False, **kwargs):
"""Docstring is inherited from the LGBMModel."""
result = self.predict_proba(X, raw_score, num_iteration, result = self.predict_proba(X, raw_score, num_iteration,
pred_leaf, pred_contrib, **kwargs) pred_leaf, pred_contrib, **kwargs)
if raw_score or pred_leaf or pred_contrib: if raw_score or pred_leaf or pred_contrib:
...@@ -739,7 +773,8 @@ class LGBMClassifier(LGBMModel, _LGBMClassifierBase): ...@@ -739,7 +773,8 @@ class LGBMClassifier(LGBMModel, _LGBMClassifierBase):
like SHAP interaction values, like SHAP interaction values,
you can install shap package (https://github.com/slundberg/shap). you can install shap package (https://github.com/slundberg/shap).
**kwargs : other parameters for the prediction **kwargs
Other parameters for the prediction.
Returns Returns
------- -------
...@@ -781,6 +816,7 @@ class LGBMRanker(LGBMModel): ...@@ -781,6 +816,7 @@ class LGBMRanker(LGBMModel):
eval_init_score=None, eval_group=None, eval_metric=None, eval_init_score=None, eval_group=None, eval_metric=None,
eval_at=[1], early_stopping_rounds=None, verbose=True, eval_at=[1], early_stopping_rounds=None, verbose=True,
feature_name='auto', categorical_feature='auto', callbacks=None): feature_name='auto', categorical_feature='auto', callbacks=None):
"""Docstring is inherited from the LGBMModel."""
# check group data # check group data
if group is None: if group is None:
raise ValueError("Should set group for ranking task") raise ValueError("Should set group for ranking task")
......
...@@ -16,7 +16,8 @@ def find_lib_path(): ...@@ -16,7 +16,8 @@ def find_lib_path():
return [] return []
curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__))) curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
dll_path = [curr_path, os.path.join(curr_path, '../../'), dll_path = [curr_path,
os.path.join(curr_path, '../../'),
os.path.join(curr_path, '../../python-package/lightgbm/compile'), os.path.join(curr_path, '../../python-package/lightgbm/compile'),
os.path.join(curr_path, '../../python-package/compile'), os.path.join(curr_path, '../../python-package/compile'),
os.path.join(curr_path, '../../lib/')] os.path.join(curr_path, '../../lib/')]
...@@ -31,7 +32,7 @@ def find_lib_path(): ...@@ -31,7 +32,7 @@ def find_lib_path():
lib_path = [p for p in dll_path if os.path.exists(p) and os.path.isfile(p)] lib_path = [p for p in dll_path if os.path.exists(p) and os.path.isfile(p)]
if not lib_path: if not lib_path:
dll_path = [os.path.realpath(p) for p in dll_path] dll_path = [os.path.realpath(p) for p in dll_path]
raise Exception('Cannot find lightgbm library in following paths: ' + '\n'.join(dll_path)) raise Exception('Cannot find lightgbm library file in following paths:\n' + '\n'.join(dll_path))
return lib_path return lib_path
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment