[docs][ci][python] added docstring style test and fixed errors in existing docstrings (#1759)

* added docstring style test and fixed errors in existing docstrings * hotfix * hotfix * fix grammar * hotfix

[docs][ci][python] added docstring style test and fixed errors in existing docstrings (#1759)
* added docstring style test and fixed errors in existing docstrings * hotfix * hotfix * fix grammar * hotfix
ccf2570c · Nikita Titov · Qiwei Ye · dfdf8861 · ccf2570c · ccf2570c
Commit ccf2570c authored Oct 16, 2018 by Nikita Titov Committed by Qiwei Ye Oct 16, 2018
15 changed files
--- a/.ci/test.sh
+++ b/.ci/test.sh
@@ -45,8 +45,9 @@ if [[ $TRAVIS == "true" ]] && [[ $TASK == "check-docs" ]]; then
 fi
 if [[ $TASK == "pylint" ]]; then
-    conda install -y -n $CONDA_ENV pycodestyle
+    conda install -y -n $CONDA_ENV pycodestyle pydocstyle
    pycodestyle --ignore=E501,W503 --exclude=./compute,./.nuget . || exit -1
+    pydocstyle --convention=numpy --add-ignore=D105 --match-dir="^(?!^compute|test|example).*" --match="(?!^test_|setup).*\.py" . || exit -1
    exit 0
 fi

--- a/.nuget/create_nuget.py
+++ b/.nuget/create_nuget.py
+# coding: utf-8
+"""Script for generating files with NuGet package metadata."""
 import os
 import sys

--- a/docs/conf.py
+++ b/docs/conf.py
@@ -16,7 +16,7 @@
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute.
+"""Sphinx configuration file."""
 import datetime
 import os
 import sys
@@ -128,4 +128,11 @@ htmlhelp_basename = 'LightGBMdoc'
 def setup(app):
+    """Add new elements at Sphinx initialization time.
+    Parameters
+    ----------
+    app : object
+        The application object representing the Sphinx process.
+    """
    app.add_javascript("js/script.js")
--- a/examples/python-guide/logistic_regression.py
+++ b/examples/python-guide/logistic_regression.py
 # coding: utf-8
 # pylint: disable = invalid-name, C0111
-'''
+"""Comparison of `binary` and `xentropy` objectives.
 BLUF: The `xentropy` objective does logistic regression and generalizes
 to the case where labels are probabilistic (i.e. numbers between 0 and 1).
@@ -9,7 +10,7 @@ Details: Both `binary` and `xentropy` minimize the log loss and use
 between them with default settings is that `binary` may achieve a slight
 speed improvement by assuming that the labels are binary instead of
 probabilistic.
-'''
+"""
 import time
@@ -46,19 +47,28 @@ DATA = {
 #################
 # Set up a couple of utilities for our experiments
 def log_loss(preds, labels):
-    ''' logarithmic loss with non-necessarily-binary labels '''
+    """Logarithmic loss with non-necessarily-binary labels."""
    log_likelihood = np.sum(labels * np.log(preds)) / len(preds)
    return -log_likelihood
 def experiment(objective, label_type, data):
-    '''
+    """Measure performance of an objective.
-    Measure performance of an objective
-    :param objective: (str) 'binary' or 'xentropy'
+    Parameters
-    :param label_type: (str) 'binary' or 'probability'
+    ----------
-    :param data: DATA
+    objective : string 'binary' or 'xentropy'
-    :return: dict with experiment summary stats
+        Objective function.
-    '''
+    label_type : string 'binary' or 'probability'
+        Type of the label.
+    data : dict
+        Data for training.
+    Returns
+    -------
+    result : dict
+        Experiment summary stats.
+    """
    np.random.seed(0)
    nrounds = 5
    lgb_data = data['lgb_with_' + label_type + '_labels']

--- a/helper/parameter_generator.py
+++ b/helper/parameter_generator.py
 # coding: utf-8
-"""This script generates LightGBM/src/io/config_auto.cpp file
+"""Helper script for generating config file and parameters list.
+This script generates LightGBM/src/io/config_auto.cpp file
 with list of all parameters, aliases table and other routines
 along with parameters description in LightGBM/docs/Parameters.rst file
 from the information in LightGBM/include/LightGBM/config.h file.
@@ -7,7 +9,19 @@ from the information in LightGBM/include/LightGBM/config.h file.
 import os
-def GetParameterInfos(config_hpp):
+def get_parameter_infos(config_hpp):
+    """Parse config header file.
+    Parameters
+    ----------
+    config_hpp : string
+        Path to the config header file.
+    Returns
+    -------
+    infos : tuple
+        Tuple with names and content of sections.
+    """
    is_inparameter = False
    parameter_group = None
    cur_key = None
@@ -63,7 +77,19 @@ def GetParameterInfos(config_hpp):
    return keys, member_infos
-def GetNames(infos):
+def get_names(infos):
+    """Get names of all parameters.
+    Parameters
+    ----------
+    infos : list
+        Content of the config header file.
+    Returns
+    -------
+    names : list
+        Names of all parameters.
+    """
    names = []
    for x in infos:
        for y in x:
@@ -71,7 +97,19 @@ def GetNames(infos):
    return names
-def GetAlias(infos):
+def get_alias(infos):
+    """Get aliases of all parameters.
+    Parameters
+    ----------
+    infos : list
+        Content of the config header file.
+    Returns
+    -------
+    pairs : list
+        List of tuples (param alias, param name).
+    """
    pairs = []
    for x in infos:
        for y in x:
@@ -83,7 +121,23 @@ def GetAlias(infos):
    return pairs
-def SetOneVarFromString(name, param_type, checks):
+def set_one_var_from_string(name, param_type, checks):
+    """Construct code for auto config file for one param value.
+    Parameters
+    ----------
+    name : string
+        Name of the parameter.
+    param_type : string
+        Type of the parameter.
+    checks : list
+        Constraints of the parameter.
+    Returns
+    -------
+    ret : string
+        Lines of auto config file with getting and checks of one parameter value.
+    """
    ret = ""
    univar_mapper = {"int": "GetInt", "double": "GetDouble", "bool": "GetBool", "std::string": "GetString"}
    if "vector" not in param_type:
@@ -103,9 +157,33 @@ def SetOneVarFromString(name, param_type, checks):
    return ret
-def GenParameterDescription(sections, descriptions, params_rst):
+def gen_parameter_description(sections, descriptions, params_rst):
+    """Write descriptions of parameters to the documentation file.
+    Parameters
+    ----------
+    sections : list
+        Names of parameters sections.
+    descriptions : list
+        Structured descriptions of parameters.
+    params_rst : string
+        Path to the file with parameters documentation.
+    """
    def parse_check(check, reverse=False):
+        """Parse the constraint.
+        Parameters
+        ----------
+        check : string
+            String representation of the constraint.
+        reverse : bool, optional (default=False)
+            Whether to reverse the sign of the constraint.
+        Returns
+        -------
+        pair : tuple
+            Parsed constraint in the form of tuple (value, sign).
+        """
        try:
            idx = 1
            float(check[idx:])
@@ -164,10 +242,24 @@ def GenParameterDescription(sections, descriptions, params_rst):
        new_params_file.write(after)
-def GenParameterCode(config_hpp, config_out_cpp):
+def gen_parameter_code(config_hpp, config_out_cpp):
-    keys, infos = GetParameterInfos(config_hpp)
+    """Generate auto config file.
-    names = GetNames(infos)
-    alias = GetAlias(infos)
+    Parameters
+    ----------
+    config_hpp : string
+        Path to the config header file.
+    config_out_cpp : string
+        Path to the auto config file.
+    Returns
+    -------
+    infos : tuple
+        Tuple with names and content of sections.
+    """
+    keys, infos = get_parameter_infos(config_hpp)
+    names = get_names(infos)
+    alias = get_alias(infos)
    str_to_write = "/// This file is auto generated by LightGBM\\helper\\parameter_generator.py from LightGBM\\include\\LightGBM\\config.h file.\n"
    str_to_write += "#include<LightGBM/config.h>\nnamespace LightGBM {\n"
    # alias table
@@ -192,7 +284,7 @@ def GenParameterCode(config_hpp, config_out_cpp):
            checks = []
            if "check" in y:
                checks = y["check"]
-            tmp = SetOneVarFromString(name, param_type, checks)
+            tmp = set_one_var_from_string(name, param_type, checks)
            str_to_write += tmp
    # tails
    str_to_write += "}\n\n"
@@ -226,5 +318,5 @@ if __name__ == "__main__":
    config_hpp = os.path.join(current_dir, os.path.pardir, 'include', 'LightGBM', 'config.h')
    config_out_cpp = os.path.join(current_dir, os.path.pardir, 'src', 'io', 'config_auto.cpp')
    params_rst = os.path.join(current_dir, os.path.pardir, 'docs', 'Parameters.rst')
-    sections, descriptions = GenParameterCode(config_hpp, config_out_cpp)
+    sections, descriptions = gen_parameter_code(config_hpp, config_out_cpp)
-    GenParameterDescription(sections, descriptions, params_rst)
+    gen_parameter_description(sections, descriptions, params_rst)
--- a/python-package/README.rst
+++ b/python-package/README.rst
@@ -151,8 +151,8 @@ Examples
 Refer to the walk through examples in `Python guide folder <https://github.com/Microsoft/LightGBM/tree/master/examples/python-guide>`_.
-Developments
+Development Guide
------------
+-----------------
 The code style of Python-package follows `PEP 8 <https://www.python.org/dev/peps/pep-0008/>`_. If you would like to make a contribution and not familiar with PEP 8, please check the PEP 8 style guide first. Otherwise, the check won't pass. You should be careful about:
@@ -166,6 +166,8 @@ The code style of Python-package follows `PEP 8 <https://www.python.org/dev/peps
 E501 (line too long) and W503 (line break occurred before a binary operator) can be ignored.
+Documentation strings (docstrings) are written in the NumPy style.
 .. |License| image:: https://img.shields.io/badge/license-MIT-blue.svg
   :target: https://github.com/Microsoft/LightGBM/blob/master/LICENSE
 .. |Python Versions| image:: https://img.shields.io/pypi/pyversions/lightgbm.svg

--- a/python-package/lightgbm/__init__.py
+++ b/python-package/lightgbm/__init__.py
 # coding: utf-8
 """LightGBM, Light Gradient Boosting Machine.
-Contributors: https://github.com/Microsoft/LightGBM/graphs/contributors
+Contributors: https://github.com/Microsoft/LightGBM/graphs/contributors.
 """
 from __future__ import absolute_import

--- a/python-package/lightgbm/basic.py
+++ b/python-package/lightgbm/basic.py
--- a/python-package/lightgbm/callback.py
+++ b/python-package/lightgbm/callback.py
 # coding: utf-8
 # pylint: disable = invalid-name, W0105, C0301
+"""Callbacks library."""
 from __future__ import absolute_import
 import collections
@@ -9,14 +10,18 @@ from .compat import range_
 class EarlyStopException(Exception):
-    """Exception of early stopping.
+    """Exception of early stopping."""
+    def __init__(self, best_iteration, best_score):
+        """Create early stopping exception.
        Parameters
        ----------
        best_iteration : int
            The best iteration stopped.
+        best_score : float
+            The score of the best iteration.
        """
-    def __init__(self, best_iteration, best_score):
        super(EarlyStopException, self).__init__()
        self.best_iteration = best_iteration
        self.best_score = best_score
@@ -34,7 +39,7 @@ CallbackEnv = collections.namedtuple(
 def _format_eval_result(value, show_stdv=True):
-    """format metric string"""
+    """Format metric string."""
    if len(value) == 4:
        return '%s\'s %s: %g' % (value[0], value[1], value[2])
    elif len(value) == 5:
@@ -61,13 +66,12 @@ def print_evaluation(period=1, show_stdv=True):
    callback : function
        The callback that prints the evaluation results every ``period`` iteration(s).
    """
-    def callback(env):
+    def _callback(env):
-        """internal function"""
        if period > 0 and env.evaluation_result_list and (env.iteration + 1) % period == 0:
            result = '\t'.join([_format_eval_result(x, show_stdv) for x in env.evaluation_result_list])
            print('[%d]\t%s' % (env.iteration + 1, result))
-    callback.order = 10
+    _callback.order = 10
-    return callback
+    return _callback
 def record_evaluation(eval_result):
@@ -87,19 +91,17 @@ def record_evaluation(eval_result):
        raise TypeError('Eval_result should be a dictionary')
    eval_result.clear()
-    def init(env):
+    def _init(env):
-        """internal function"""
        for data_name, _, _, _ in env.evaluation_result_list:
            eval_result.setdefault(data_name, collections.defaultdict(list))
-    def callback(env):
+    def _callback(env):
-        """internal function"""
        if not eval_result:
-            init(env)
+            _init(env)
        for data_name, eval_name, result, _ in env.evaluation_result_list:
            eval_result[data_name][eval_name].append(result)
-    callback.order = 20
+    _callback.order = 20
-    return callback
+    return _callback
 def reset_parameter(**kwargs):
@@ -111,7 +113,7 @@ def reset_parameter(**kwargs):
    Parameters
    ----------
-    **kwargs: value should be list or function
+    **kwargs : value should be list or function
        List of parameters for each boosting round
        or a customized function that calculates the parameter in terms of
        current number of round (e.g. yields learning rate decay).
@@ -123,8 +125,7 @@ def reset_parameter(**kwargs):
    callback : function
        The callback that resets the parameter after the first iteration.
    """
-    def callback(env):
+    def _callback(env):
-        """internal function"""
        new_parameters = {}
        for key, value in kwargs.items():
            if key in ['num_class', 'num_classes',
@@ -143,9 +144,9 @@ def reset_parameter(**kwargs):
        if new_parameters:
            env.model.reset_parameter(new_parameters)
            env.params.update(new_parameters)
-    callback.before_iteration = True
+    _callback.before_iteration = True
-    callback.order = 10
+    _callback.order = 10
-    return callback
+    return _callback
 def early_stopping(stopping_rounds, verbose=True):
@@ -164,7 +165,6 @@ def early_stopping(stopping_rounds, verbose=True):
    ----------
    stopping_rounds : int
       The possible number of rounds without the trend occurrence.
    verbose : bool, optional (default=True)
        Whether to print message with early stopping information.
@@ -178,8 +178,7 @@ def early_stopping(stopping_rounds, verbose=True):
    best_score_list = []
    cmp_op = []
-    def init(env):
+    def _init(env):
-        """internal function"""
        if not env.evaluation_result_list:
            raise ValueError('For early stopping, '
                             'at least one dataset and eval metric is required for evaluation')
@@ -198,10 +197,9 @@ def early_stopping(stopping_rounds, verbose=True):
                best_score.append(float('inf'))
                cmp_op.append(lt)
-    def callback(env):
+    def _callback(env):
-        """internal function"""
        if not cmp_op:
-            init(env)
+            _init(env)
        for i in range_(len(env.evaluation_result_list)):
            score = env.evaluation_result_list[i][2]
            if cmp_op[i](score, best_score[i]):
@@ -218,5 +216,5 @@ def early_stopping(stopping_rounds, verbose=True):
                    print('Did not meet early stopping. Best iteration is:\n[%d]\t%s' % (
                        best_iter[i] + 1, '\t'.join([_format_eval_result(x) for x in best_score_list[i]])))
                raise EarlyStopException(best_iter[i], best_score_list[i])
-    callback.order = 30
+    _callback.order = 30
-    return callback
+    return _callback
--- a/python-package/lightgbm/compat.py
+++ b/python-package/lightgbm/compat.py
 # coding: utf-8
 # pylint: disable = C0103
-"""Compatibility"""
+"""Compatibility library."""
 from __future__ import absolute_import
 import inspect
@@ -10,7 +10,7 @@ import numpy as np
 is_py3 = (sys.version_info[0] == 3)
-"""compatibility between python2 and python3"""
+"""Compatibility between Python2 and Python3"""
 if is_py3:
    zip_ = zip
    string_type = str
@@ -19,10 +19,11 @@ if is_py3:
    range_ = range
    def argc_(func):
-        """return number of arguments of a function"""
+        """Count the number of arguments of a function."""
        return len(inspect.signature(func).parameters)
    def decode_string(bytestring):
+        """Decode C bytestring to ordinary string."""
        return bytestring.decode('utf-8')
 else:
    from itertools import izip as zip_
@@ -32,10 +33,11 @@ else:
    range_ = xrange
    def argc_(func):
-        """return number of arguments of a function"""
+        """Count the number of arguments of a function."""
        return len(inspect.getargspec(func).args)
    def decode_string(bytestring):
+        """Decode C bytestring to ordinary string."""
        return bytestring
 """json"""
@@ -48,6 +50,7 @@ except (ImportError, SyntaxError):
 def json_default_with_numpy(obj):
+    """Convert numpy classes to JSON serializable objects."""
    if isinstance(obj, (np.integer, np.floating, np.bool_)):
        return obj.item()
    elif isinstance(obj, np.ndarray):
@@ -64,9 +67,13 @@ except ImportError:
    PANDAS_INSTALLED = False
    class Series(object):
+        """Dummy class for pandas.Series."""
        pass
    class DataFrame(object):
+        """Dummy class for pandas.DataFrame."""
        pass
 """matplotlib"""
@@ -131,4 +138,6 @@ except ImportError:
 # DeprecationWarning is not shown by default, so let's create our own with higher level
 class LGBMDeprecationWarning(UserWarning):
+    """Custom deprecation warning."""
    pass
--- a/python-package/lightgbm/engine.py
+++ b/python-package/lightgbm/engine.py
 # coding: utf-8
 # pylint: disable = invalid-name, W0105
-"""Training Library containing training routines of LightGBM."""
+"""Library with training routines of LightGBM."""
 from __future__ import absolute_import
 import collections
@@ -30,21 +30,21 @@ def train(params, train_set, num_boost_round=100,
    params : dict
        Parameters for training.
    train_set : Dataset
-        Data to be trained.
+        Data to be trained on.
-    num_boost_round: int, optional (default=100)
+    num_boost_round : int, optional (default=100)
        Number of boosting iterations.
-    valid_sets: list of Datasets or None, optional (default=None)
+    valid_sets : list of Datasets or None, optional (default=None)
-        List of data to be evaluated during training.
+        List of data to be evaluated on during training.
-    valid_names: list of string or None, optional (default=None)
+    valid_names : list of strings or None, optional (default=None)
        Names of ``valid_sets``.
    fobj : callable or None, optional (default=None)
        Customized objective function.
    feval : callable or None, optional (default=None)
        Customized evaluation function.
-        Should accept two parameters: preds, train_data.
+        Should accept two parameters: preds, train_data,
+        and return (eval_name, eval_result, is_higher_better) or list of such tuples.
        For multi-class task, the preds is group by class_id first, then group by row_id.
        If you want to get i-th row preds in j-th class, the access way is preds[j * num_data + i].
-        Note: should return (eval_name, eval_result, is_higher_better) or list of such tuples.
        To ignore the default metric corresponding to the used objective,
        set the ``metric`` parameter to the string ``"None"`` in ``params``.
    init_model : string, Booster or None, optional (default=None)
@@ -60,23 +60,24 @@ def train(params, train_set, num_boost_round=100,
        All values in categorical features should be less than int32 max value (2147483647).
        Large values could be memory consuming. Consider using consecutive integers starting from zero.
        All negative values in categorical features will be treated as missing values.
-    early_stopping_rounds: int or None, optional (default=None)
+    early_stopping_rounds : int or None, optional (default=None)
        Activates early stopping. The model will train until the validation score stops improving.
        Validation score needs to improve at least every ``early_stopping_rounds`` round(s)
        to continue training.
        Requires at least one validation data and one metric.
        If there's more than one, will check all of them. But the training data is ignored anyway.
        If early stopping occurs, the model will add ``best_iteration`` field.
-    evals_result: dict or None, optional (default=None)
+    evals_result : dict or None, optional (default=None)
        This dictionary used to store all evaluation results of all the items in ``valid_sets``.
        Example
        -------
        With a ``valid_sets`` = [valid_set, train_set],
        ``valid_names`` = ['eval', 'train']
-        and a ``params`` = ('metric':'logloss')
+        and a ``params`` = {'metric': 'logloss'}
-        returns: {'train': {'logloss': ['0.48253', '0.35953', ...]},
+        returns {'train': {'logloss': ['0.48253', '0.35953', ...]},
        'eval': {'logloss': ['0.480385', '0.357756', ...]}}.
    verbose_eval : bool or int, optional (default=True)
        Requires at least one validation data.
        If True, the eval metric on the valid set is printed at each boosting stage.
@@ -85,9 +86,10 @@ def train(params, train_set, num_boost_round=100,
        Example
        -------
-        With ``verbose_eval`` = 4 and at least one item in evals,
+        With ``verbose_eval`` = 4 and at least one item in ``valid_sets``,
        an evaluation metric is printed every 4 (instead of 1) boosting stages.
-    learning_rates: list, callable or None, optional (default=None)
+    learning_rates : list, callable or None, optional (default=None)
        List of learning rates for each boosting round
        or a customized function that calculates ``learning_rate``
        in terms of current number of round (e.g. yields learning rate decay).
@@ -238,31 +240,30 @@ def train(params, train_set, num_boost_round=100,
    return booster
-class CVBooster(object):
+class _CVBooster(object):
-    """"Auxiliary data struct to hold all boosters of CV."""
+    """Auxiliary data struct to hold all boosters of CV."""
    def __init__(self):
        self.boosters = []
        self.best_iteration = -1
    def append(self, booster):
-        """add a booster to CVBooster"""
+        """Add a booster to _CVBooster."""
        self.boosters.append(booster)
    def __getattr__(self, name):
-        """redirect methods call of CVBooster"""
+        """Redirect methods call of _CVBooster."""
-        def handlerFunction(*args, **kwargs):
+        def handler_function(*args, **kwargs):
-            """call methods with each booster, and concatenate their results"""
+            """Call methods with each booster, and concatenate their results."""
            ret = []
            for booster in self.boosters:
                ret.append(getattr(booster, name)(*args, **kwargs))
            return ret
-        return handlerFunction
+        return handler_function
 def _make_n_folds(full_data, folds, nfold, params, seed, fpreproc=None, stratified=True, shuffle=True):
-    """
+    """Make a n-fold list of Booster from random indices."""
-    Make an n-fold list of Booster from random indices.
-    """
    full_data = full_data.construct()
    num_data = full_data.num_data()
    if folds is not None:
@@ -301,7 +302,7 @@ def _make_n_folds(full_data, folds, nfold, params, seed, fpreproc=None, stratifi
            train_id = [np.concatenate([test_id[i] for i in range_(nfold) if k != i]) for k in range_(nfold)]
            folds = zip_(train_id, test_id)
-    ret = CVBooster()
+    ret = _CVBooster()
    for train_idx, test_idx in folds:
        train_set = full_data.subset(train_idx)
        valid_set = full_data.subset(test_idx)
@@ -317,9 +318,7 @@ def _make_n_folds(full_data, folds, nfold, params, seed, fpreproc=None, stratifi
 def _agg_cv_result(raw_results):
-    """
+    """Aggregate cross-validation results."""
-    Aggregate cross-validation results.
-    """
    cvmap = collections.defaultdict(list)
    metric_type = {}
    for one_result in raw_results:
@@ -356,7 +355,7 @@ def cv(params, train_set, num_boost_round=100,
        Number of folds in CV.
    stratified : bool, optional (default=True)
        Whether to perform stratified sampling.
-    shuffle: bool, optional (default=True)
+    shuffle : bool, optional (default=True)
        Whether to shuffle before splitting data.
    metrics : string, list of strings or None, optional (default=None)
        Evaluation metrics to be monitored while CV.
@@ -365,10 +364,10 @@ def cv(params, train_set, num_boost_round=100,
        Custom objective function.
    feval : callable or None, optional (default=None)
        Customized evaluation function.
-        Should accept two parameters: preds, train_data.
+        Should accept two parameters: preds, train_data,
+        and return (eval_name, eval_result, is_higher_better) or list of such tuples.
        For multi-class task, the preds is group by class_id first, then group by row_id.
        If you want to get i-th row preds in j-th class, the access way is preds[j * num_data + i].
-        Note: should return (eval_name, eval_result, is_higher_better) or list of such tuples.
        To ignore the default metric corresponding to the used objective,
        set ``metrics`` to the string ``"None"``.
    init_model : string, Booster or None, optional (default=None)
@@ -384,12 +383,12 @@ def cv(params, train_set, num_boost_round=100,
        All values in categorical features should be less than int32 max value (2147483647).
        Large values could be memory consuming. Consider using consecutive integers starting from zero.
        All negative values in categorical features will be treated as missing values.
-    early_stopping_rounds: int or None, optional (default=None)
+    early_stopping_rounds : int or None, optional (default=None)
        Activates early stopping.
        CV score needs to improve at least every ``early_stopping_rounds`` round(s)
        to continue.
        Requires at least one metric. If there's more than one, will check all of them.
-        Last entry in evaluation history is the one from best iteration.
+        Last entry in evaluation history is the one from the best iteration.
    fpreproc : callable or None, optional (default=None)
        Preprocessing function that takes (dtrain, dtest, params)
        and returns transformed versions of those.
@@ -400,7 +399,7 @@ def cv(params, train_set, num_boost_round=100,
        If int, progress will be displayed at every given ``verbose_eval`` boosting stage.
    show_stdv : bool, optional (default=True)
        Whether to display the standard deviation in progress.
-        Results are not affected by this parameter, and always contains std.
+        Results are not affected by this parameter, and always contain std.
    seed : int, optional (default=0)
        Seed used to generate the folds (passed to numpy.random.seed).
    callbacks : list of callables or None, optional (default=None)

--- a/python-package/lightgbm/libpath.py
+++ b/python-package/lightgbm/libpath.py
 # coding: utf-8
-"""Find the path to lightgbm dynamic library files."""
+"""Find the path to LightGBM dynamic library files."""
 import os
 from platform import system
@@ -7,17 +7,19 @@ from platform import system
 def find_lib_path():
    """Find the path to LightGBM library files.
    Returns
    -------
-    lib_path: list(string)
+    lib_path: list of strings
-       List of all found library path to LightGBM
+       List of all found library paths to LightGBM.
    """
    if os.environ.get('LIGHTGBM_BUILD_DOC', False):
        # we don't need lib_lightgbm while building docs
        return []
    curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
-    dll_path = [curr_path, os.path.join(curr_path, '../../'),
+    dll_path = [curr_path,
+                os.path.join(curr_path, '../../'),
                os.path.join(curr_path, 'compile'),
                os.path.join(curr_path, '../compile'),
                os.path.join(curr_path, '../../lib/')]
@@ -32,5 +34,5 @@ def find_lib_path():
    lib_path = [p for p in dll_path if os.path.exists(p) and os.path.isfile(p)]
    if not lib_path:
        dll_path = [os.path.realpath(p) for p in dll_path]
-        raise Exception('Cannot find lightgbm library in following paths: ' + '\n'.join(dll_path))
+        raise Exception('Cannot find lightgbm library file in following paths:\n' + '\n'.join(dll_path))
    return lib_path
--- a/python-package/lightgbm/plotting.py
+++ b/python-package/lightgbm/plotting.py
 # coding: utf-8
 # pylint: disable = C0103
-"""Plotting Library."""
+"""Plotting library."""
 from __future__ import absolute_import
 import warnings
@@ -15,8 +15,8 @@ from .compat import (MATPLOTLIB_INSTALLED, GRAPHVIZ_INSTALLED, LGBMDeprecationWa
 from .sklearn import LGBMModel
-def check_not_tuple_of_2_elements(obj, obj_name='obj'):
+def _check_not_tuple_of_2_elements(obj, obj_name='obj'):
-    """check object is not tuple or does not have 2 elements"""
+    """Check object is not tuple or does not have 2 elements."""
    if not isinstance(obj, tuple) or len(obj) != 2:
        raise TypeError('%s must be a tuple of 2 elements.' % obj_name)
@@ -63,7 +63,7 @@ def plot_importance(booster, ax=None, height=0.2,
        Figure size.
    grid : bool, optional (default=True)
        Whether to add a grid for axes.
-    **kwargs : other parameters
+    **kwargs
        Other parameters passed to ``ax.barh()``.
    Returns
@@ -96,7 +96,7 @@ def plot_importance(booster, ax=None, height=0.2,
    if ax is None:
        if figsize is not None:
-            check_not_tuple_of_2_elements(figsize, 'figsize')
+            _check_not_tuple_of_2_elements(figsize, 'figsize')
        _, ax = plt.subplots(1, 1, figsize=figsize)
    ylocs = np.arange(len(values))
@@ -109,13 +109,13 @@ def plot_importance(booster, ax=None, height=0.2,
    ax.set_yticklabels(labels)
    if xlim is not None:
-        check_not_tuple_of_2_elements(xlim, 'xlim')
+        _check_not_tuple_of_2_elements(xlim, 'xlim')
    else:
        xlim = (0, max(values) * 1.1)
    ax.set_xlim(xlim)
    if ylim is not None:
-        check_not_tuple_of_2_elements(ylim, 'ylim')
+        _check_not_tuple_of_2_elements(ylim, 'ylim')
    else:
        ylim = (-1, len(values))
    ax.set_ylim(ylim)
@@ -194,7 +194,7 @@ def plot_metric(booster, metric=None, dataset_names=None,
    if ax is None:
        if figsize is not None:
-            check_not_tuple_of_2_elements(figsize, 'figsize')
+            _check_not_tuple_of_2_elements(figsize, 'figsize')
        _, ax = plt.subplots(1, 1, figsize=figsize)
    if dataset_names is None:
@@ -229,13 +229,13 @@ def plot_metric(booster, metric=None, dataset_names=None,
    ax.legend(loc='best')
    if xlim is not None:
-        check_not_tuple_of_2_elements(xlim, 'xlim')
+        _check_not_tuple_of_2_elements(xlim, 'xlim')
    else:
        xlim = (0, num_iteration)
    ax.set_xlim(xlim)
    if ylim is not None:
-        check_not_tuple_of_2_elements(ylim, 'ylim')
+        _check_not_tuple_of_2_elements(ylim, 'ylim')
    else:
        range_result = max_result - min_result
        ylim = (min_result - range_result * 0.2, max_result + range_result * 0.2)
@@ -270,7 +270,7 @@ def _to_graphviz(tree_info, show_info, feature_names, precision=None, **kwargs):
            if precision is not None and not isinstance(value, string_type) else str(value)
    def add(root, parent=None, decision=None):
-        """recursively add node or edge"""
+        """Recursively add node or edge."""
        if 'split_index' in root:  # non-leaf
            name = 'split{0}'.format(root['split_index'])
            if feature_names is not None:
@@ -322,7 +322,7 @@ def create_tree_digraph(booster, tree_index=0, show_info=None, precision=None,
    Parameters
    ----------
    booster : Booster or LGBMModel
-        Booster or LGBMModel instance.
+        Booster or LGBMModel instance to be converted.
    tree_index : int, optional (default=0)
        The index of a target tree to convert.
    show_info : list of strings or None, optional (default=None)
@@ -330,7 +330,7 @@ def create_tree_digraph(booster, tree_index=0, show_info=None, precision=None,
        Possible values of list items: 'split_gain', 'internal_value', 'internal_count', 'leaf_count'.
    precision : int or None, optional (default=None)
        Used to restrict the display of floating point values to a certain precision.
-    **kwargs : other parameters
+    **kwargs
        Other parameters passed to ``Digraph`` constructor.
        Check https://graphviz.readthedocs.io/en/stable/api.html#digraph for the full list of supported parameters.
@@ -407,7 +407,7 @@ def plot_tree(booster, ax=None, tree_index=0, figsize=None,
        Possible values of list items: 'split_gain', 'internal_value', 'internal_count', 'leaf_count'.
    precision : int or None, optional (default=None)
        Used to restrict the display of floating point values to a certain precision.
-    **kwargs : other parameters
+    **kwargs
        Other parameters passed to ``Digraph`` constructor.
        Check https://graphviz.readthedocs.io/en/stable/api.html#digraph for the full list of supported parameters.
@@ -433,7 +433,7 @@ def plot_tree(booster, ax=None, tree_index=0, figsize=None,
    if ax is None:
        if figsize is not None:
-            check_not_tuple_of_2_elements(figsize, 'figsize')
+            _check_not_tuple_of_2_elements(figsize, 'figsize')
        _, ax = plt.subplots(1, 1, figsize=figsize)
    graph = create_tree_digraph(booster=booster, tree_index=tree_index,

--- a/python-package/lightgbm/sklearn.py
+++ b/python-package/lightgbm/sklearn.py
 # coding: utf-8
 # pylint: disable = invalid-name, W0105, C0111, C0301
-"""Scikit-Learn Wrapper interface for LightGBM."""
+"""Scikit-learn wrapper interface for LightGBM."""
 from __future__ import absolute_import
 import numpy as np
@@ -16,8 +16,11 @@ from .engine import train
 def _objective_function_wrapper(func):
-    """Decorate an objective function
+    """Decorate an objective function.
-    Note: for multi-class task, the y_pred is group by class_id first, then group by row_id.
+    Note
+    ----
+    For multi-class task, the y_pred is group by class_id first, then group by row_id.
    If you want to get i-th row y_pred in j-th class, the access way is y_pred[j * num_data + i]
    and you should group grad and hess in this way as well.
@@ -25,9 +28,10 @@ def _objective_function_wrapper(func):
    ----------
    func : callable
        Expects a callable with signature ``func(y_true, y_pred)`` or ``func(y_true, y_pred, group):
            y_true : array-like of shape = [n_samples]
                The target values.
-            y_pred : array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class)
+            y_pred : array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task)
                The predicted values.
            group : array-like
                Group/query data, used for ranking task.
@@ -38,14 +42,13 @@ def _objective_function_wrapper(func):
        The new objective function as expected by ``lightgbm.engine.train``.
        The signature is ``new_func(preds, dataset)``:
-        preds : array-like of shape = [n_samples] or shape = [n_samples * n_classes]
+            preds : array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task)
                The predicted values.
-        dataset : ``dataset``
+            dataset : Dataset
-            The training set from which the labels will be extracted using
+                The training set from which the labels will be extracted using ``dataset.get_label()``.
-            ``dataset.get_label()``.
    """
    def inner(preds, dataset):
-        """internal function"""
+        """Call passed function with appropriate arguments."""
        labels = dataset.get_label()
        argc = argc_(func)
        if argc == 2:
@@ -76,24 +79,27 @@ def _objective_function_wrapper(func):
 def _eval_function_wrapper(func):
-    """Decorate an eval function
+    """Decorate an eval function.
-    Note: for multi-class task, the y_pred is group by class_id first, then group by row_id.
+    Note
+    ----
+    For multi-class task, the y_pred is group by class_id first, then group by row_id.
    If you want to get i-th row y_pred in j-th class, the access way is y_pred[j * num_data + i].
    Parameters
    ----------
    func : callable
-        Expects a callable with following functions:
+        Expects a callable with following signatures:
        ``func(y_true, y_pred)``,
        ``func(y_true, y_pred, weight)``
        or ``func(y_true, y_pred, weight, group)``
-            and return (eval_name->str, eval_result->float, is_bigger_better->Bool):
+        and returns (eval_name->string, eval_result->float, is_bigger_better->bool):
            y_true : array-like of shape = [n_samples]
                The target values.
-            y_pred : array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class)
+            y_pred : array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task)
                The predicted values.
-            weight : array_like of shape = [n_samples]
+            weight : array-like of shape = [n_samples]
                The weight of samples.
            group : array-like
                Group/query data, used for ranking task.
@@ -104,14 +110,13 @@ def _eval_function_wrapper(func):
        The new eval function as expected by ``lightgbm.engine.train``.
        The signature is ``new_func(preds, dataset)``:
-        preds : array-like of shape = [n_samples] or shape = [n_samples * n_classes]
+            preds : array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task)
                The predicted values.
-        dataset : ``dataset``
+            dataset : Dataset
-            The training set from which the labels will be extracted using
+                The training set from which the labels will be extracted using ``dataset.get_label()``.
-            ``dataset.get_label()``.
    """
    def inner(preds, dataset):
-        """internal function"""
+        """Call passed function with appropriate arguments."""
        labels = dataset.get_label()
        argc = argc_(func)
        if argc == 2:
@@ -128,18 +133,18 @@ def _eval_function_wrapper(func):
 class LGBMModel(_LGBMModelBase):
    """Implementation of the scikit-learn API for LightGBM."""
-    def __init__(self, boosting_type="gbdt", num_leaves=31, max_depth=-1,
+    def __init__(self, boosting_type='gbdt', num_leaves=31, max_depth=-1,
                 learning_rate=0.1, n_estimators=100,
                 subsample_for_bin=200000, objective=None, class_weight=None,
                 min_split_gain=0., min_child_weight=1e-3, min_child_samples=20,
                 subsample=1., subsample_freq=0, colsample_bytree=1.,
                 reg_alpha=0., reg_lambda=0., random_state=None,
                 n_jobs=-1, silent=True, importance_type='split', **kwargs):
-        """Construct a gradient boosting model.
+        r"""Construct a gradient boosting model.
        Parameters
        ----------
-        boosting_type : string, optional (default="gbdt")
+        boosting_type : string, optional (default='gbdt')
            'gbdt', traditional Gradient Boosting Decision Tree.
            'dart', Dropouts meet Multiple Additive Regression Trees.
            'goss', Gradient-based One-Side Sampling.
@@ -168,14 +173,14 @@ class LGBMModel(_LGBMModelBase):
            The 'balanced' mode uses the values of y to automatically adjust weights
            inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))``.
            If None, all classes are supposed to have weight one.
-            Note that these weights will be multiplied with ``sample_weight`` (passed through the fit method)
+            Note, that these weights will be multiplied with ``sample_weight`` (passed through the ``fit`` method)
            if ``sample_weight`` is specified.
        min_split_gain : float, optional (default=0.)
            Minimum loss reduction required to make a further partition on a leaf node of the tree.
        min_child_weight : float, optional (default=1e-3)
-            Minimum sum of instance weight(hessian) needed in a child(leaf).
+            Minimum sum of instance weight (hessian) needed in a child (leaf).
        min_child_samples : int, optional (default=20)
-            Minimum number of data need in a child(leaf).
+            Minimum number of data needed in a child (leaf).
        subsample : float, optional (default=1.)
            Subsample ratio of the training instance.
        subsample_freq : int, optional (default=0)
@@ -195,14 +200,15 @@ class LGBMModel(_LGBMModelBase):
            Whether to print messages while running boosting.
        importance_type : string, optional (default='split')
            The type of feature importance to be filled into ``feature_importances_``.
-            If "split", result contains numbers of times the feature is used in a model.
+            If 'split', result contains numbers of times the feature is used in a model.
-            If "gain", result contains total gains of splits which use the feature.
+            If 'gain', result contains total gains of splits which use the feature.
-        **kwargs : other parameters
+        **kwargs
+            Other parameters for the model.
            Check http://lightgbm.readthedocs.io/en/latest/Parameters.html for more parameters.
            Note
            ----
-            \\*\\*kwargs is not supported in sklearn, it may cause unexpected issues.
+            \*\*kwargs is not supported in sklearn, it may cause unexpected issues.
        Attributes
        ----------
@@ -227,8 +233,8 @@ class LGBMModel(_LGBMModelBase):
        Note
        ----
-        A custom objective function can be provided for the ``objective``
+        A custom objective function can be provided for the ``objective`` parameter.
-        parameter. In this case, it should have the signature
+        In this case, it should have the signature
        ``objective(y_true, y_pred) -> grad, hess`` or
        ``objective(y_true, y_pred, group) -> grad, hess``:
@@ -282,12 +288,37 @@ class LGBMModel(_LGBMModelBase):
        self.set_params(**kwargs)
    def get_params(self, deep=True):
+        """Get parameters for this estimator.
+        Parameters
+        ----------
+        deep : bool, optional (default=True)
+            If True, will return the parameters for this estimator and
+            contained subobjects that are estimators.
+        Returns
+        -------
+        params : dict
+            Parameter names mapped to their values.
+        """
        params = super(LGBMModel, self).get_params(deep=deep)
        params.update(self._other_params)
        return params
    # minor change to support `**kwargs`
    def set_params(self, **params):
+        """Set the parameters of this estimator.
+        Parameters
+        ----------
+        **params
+            Parameter names with their new values.
+        Returns
+        -------
+        self : object
+            Returns self.
+        """
        for key, value in params.items():
            setattr(self, key, value)
            if hasattr(self, '_' + key):
@@ -340,10 +371,10 @@ class LGBMModel(_LGBMModelBase):
            If there's more than one, will check all of them. But the training data is ignored anyway.
        verbose : bool, optional (default=True)
            If True and an evaluation set is used, writes the evaluation progress.
-        feature_name : list of strings or 'auto', optional (default="auto")
+        feature_name : list of strings or 'auto', optional (default='auto')
            Feature names.
            If 'auto' and data is pandas DataFrame, data columns names are used.
-        categorical_feature : list of strings or int, or 'auto', optional (default="auto")
+        categorical_feature : list of strings or int, or 'auto', optional (default='auto')
            Categorical features.
            If list of int, interpreted as indices.
            If list of strings, interpreted as feature names (need to specify ``feature_name`` as well).
@@ -362,15 +393,15 @@ class LGBMModel(_LGBMModelBase):
        Note
        ----
-        Custom eval function expects a callable with following functions:
+        Custom eval function expects a callable with following signatures:
        ``func(y_true, y_pred)``, ``func(y_true, y_pred, weight)`` or
-        ``func(y_true, y_pred, weight, group)``.
+        ``func(y_true, y_pred, weight, group)``
-        Returns (eval_name, eval_result, is_bigger_better) or
+        and returns (eval_name, eval_result, is_bigger_better) or
-        list of (eval_name, eval_result, is_bigger_better)
+        list of (eval_name, eval_result, is_bigger_better):
            y_true : array-like of shape = [n_samples]
                The target values.
-            y_pred : array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class)
+            y_pred : array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task)
                The predicted values.
            weight : array-like of shape = [n_samples]
                The weight of samples.
@@ -539,7 +570,8 @@ class LGBMModel(_LGBMModelBase):
            like SHAP interaction values,
            you can install shap package (https://github.com/slundberg/shap).
-        **kwargs : other parameters for the prediction
+        **kwargs
+            Other parameters for the prediction.
        Returns
        -------
@@ -629,7 +661,7 @@ class LGBMRegressor(LGBMModel, _LGBMRegressorBase):
            eval_set=None, eval_names=None, eval_sample_weight=None,
            eval_init_score=None, eval_metric=None, early_stopping_rounds=None,
            verbose=True, feature_name='auto', categorical_feature='auto', callbacks=None):
+        """Docstring is inherited from the LGBMModel."""
        super(LGBMRegressor, self).fit(X, y, sample_weight=sample_weight,
                                       init_score=init_score, eval_set=eval_set,
                                       eval_names=eval_names,
@@ -656,6 +688,7 @@ class LGBMClassifier(LGBMModel, _LGBMClassifierBase):
            eval_class_weight=None, eval_init_score=None, eval_metric=None,
            early_stopping_rounds=None, verbose=True,
            feature_name='auto', categorical_feature='auto', callbacks=None):
+        """Docstring is inherited from the LGBMModel."""
        _LGBMAssertAllFinite(y)
        _LGBMCheckClassificationTargets(y)
        self._le = _LGBMLabelEncoder().fit(y)
@@ -704,6 +737,7 @@ class LGBMClassifier(LGBMModel, _LGBMClassifierBase):
    def predict(self, X, raw_score=False, num_iteration=None,
                pred_leaf=False, pred_contrib=False, **kwargs):
+        """Docstring is inherited from the LGBMModel."""
        result = self.predict_proba(X, raw_score, num_iteration,
                                    pred_leaf, pred_contrib, **kwargs)
        if raw_score or pred_leaf or pred_contrib:
@@ -739,7 +773,8 @@ class LGBMClassifier(LGBMModel, _LGBMClassifierBase):
            like SHAP interaction values,
            you can install shap package (https://github.com/slundberg/shap).
-        **kwargs : other parameters for the prediction
+        **kwargs
+            Other parameters for the prediction.
        Returns
        -------
@@ -781,6 +816,7 @@ class LGBMRanker(LGBMModel):
            eval_init_score=None, eval_group=None, eval_metric=None,
            eval_at=[1], early_stopping_rounds=None, verbose=True,
            feature_name='auto', categorical_feature='auto', callbacks=None):
+        """Docstring is inherited from the LGBMModel."""
        # check group data
        if group is None:
            raise ValueError("Should set group for ranking task")

--- a/tests/c_api_test/test_.py
+++ b/tests/c_api_test/test_.py
@@ -16,7 +16,8 @@ def find_lib_path():
        return []
    curr_path = os.path.dirname(os.path.abspath(os.path.expanduser(__file__)))
-    dll_path = [curr_path, os.path.join(curr_path, '../../'),
+    dll_path = [curr_path,
+                os.path.join(curr_path, '../../'),
                os.path.join(curr_path, '../../python-package/lightgbm/compile'),
                os.path.join(curr_path, '../../python-package/compile'),
                os.path.join(curr_path, '../../lib/')]
@@ -31,7 +32,7 @@ def find_lib_path():
    lib_path = [p for p in dll_path if os.path.exists(p) and os.path.isfile(p)]
    if not lib_path:
        dll_path = [os.path.realpath(p) for p in dll_path]
-        raise Exception('Cannot find lightgbm library in following paths: ' + '\n'.join(dll_path))
+        raise Exception('Cannot find lightgbm library file in following paths:\n' + '\n'.join(dll_path))
    return lib_path