python code style with pep8 (#161)

* format python code with pep8 * **DO NOT MERGE** deliberately break rules to see what will happen during check * Revert "**DO NOT MERGE** deliberately break rules to see what will happen during check" This reverts commit 0db93cd7a43c7efa43a2112ada43d46c6f9115d9. * fix format in test.py * add docs for pep-8

python code style with pep8 (#161)
* format python code with pep8 * **DO NOT MERGE** deliberately break rules to see what will happen during check * Revert "**DO NOT MERGE** deliberately break rules to see what will happen during check" This reverts commit 0db93cd7a43c7efa43a2112ada43d46c6f9115d9. * fix format in test.py * add docs for pep-8
dd425973 · wxchan · Guolin Ke · c4778e73 · dd425973 · dd425973
Commit dd425973 authored Jan 04, 2017 by wxchan Committed by Guolin Ke Jan 04, 2017
14 changed files
--- a/.travis.yml
+++ b/.travis.yml
@@ -13,8 +13,9 @@ before_install:
 - conda update -q conda

 install:
- sudo apt-get install -y libopenmpi-dev openmpi-bin build-essential 
+- sudo apt-get install -y libopenmpi-dev openmpi-bin build-essential
 - conda install --yes atlas numpy scipy scikit-learn
+- pip install pep8


 script:
@@ -23,9 +24,9 @@ script:
 - cd $TRAVIS_BUILD_DIR/tests/c_api_test && python test.py
 - cd $TRAVIS_BUILD_DIR/python-package && python setup.py install
 - cd $TRAVIS_BUILD_DIR/tests/python_package_test && python test_basic.py && python test_engine.py && python test_sklearn.py
- cd $TRAVIS_BUILD_DIR
+- cd $TRAVIS_BUILD_DIR && pep8 --ignore=E501 .
 - rm -rf build && mkdir build && cd build && cmake -DUSE_MPI=ON ..&& make -j
- cd $TRAVIS_BUILD_DIR/tests/c_api_test && python test.py 
+- cd $TRAVIS_BUILD_DIR/tests/c_api_test && python test.py
 - cd $TRAVIS_BUILD_DIR/python-package && python setup.py install
 - cd $TRAVIS_BUILD_DIR/tests/python_package_test && python test_basic.py && python test_engine.py && python test_sklearn.py


--- a/examples/python-guide/advanced_example.py
+++ b/examples/python-guide/advanced_example.py
@@ -27,15 +27,15 @@ lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train,

 # specify your configurations as a dict
 params = {
-    'boosting_type' : 'gbdt',
-    'objective' : 'binary',
-    'metric' : 'binary_logloss',
-    'num_leaves' : 31,
-    'learning_rate' : 0.05,
-    'feature_fraction' : 0.9,
-    'bagging_fraction' : 0.8,
+    'boosting_type': 'gbdt',
+    'objective': 'binary',
+    'metric': 'binary_logloss',
+    'num_leaves': 31,
+    'learning_rate': 0.05,
+    'feature_fraction': 0.9,
+    'bagging_fraction': 0.8,
    'bagging_freq': 5,
-    'verbose' : 0
+    'verbose': 0
 }

 # generate a feature name
@@ -46,7 +46,7 @@ print('Start training...')
 gbm = lgb.train(params,
                lgb_train,
                num_boost_round=10,
-                valid_sets=lgb_train, # eval training data
+                valid_sets=lgb_train,  # eval training data
                feature_name=feature_name,
                categorical_feature=[21])

@@ -88,10 +88,11 @@ gbm = lgb.train(params,
                num_boost_round=10,
                init_model=gbm,
                valid_sets=lgb_eval,
-                callbacks=[lgb.reset_parameter(bagging_fraction=[0.7]*5+[0.6]*5)])
+                callbacks=[lgb.reset_parameter(bagging_fraction=[0.7] * 5 + [0.6] * 5)])

 print('Finish 30 - 40 rounds with changing bagging_fraction...')

+
 # self-defined objective function
 # f(preds: array, train_data: Dataset) -> grad: array, hess: array
 # log likelihood loss
@@ -102,6 +103,7 @@ def loglikelood(preds, train_data):
    hess = preds * (1. - preds)
    return grad, hess

+
 # self-defined eval metric
 # f(preds: array, train_data: Dataset) -> name: string, value: array, is_higher_better: bool
 # binary error
@@ -109,6 +111,7 @@ def binary_error(preds, train_data):
    labels = train_data.get_label()
    return 'error', np.mean(labels != (preds > 0.5)), False

+
 gbm = lgb.train(params,
                lgb_train,
                num_boost_round=10,
@@ -120,6 +123,8 @@ gbm = lgb.train(params,
 print('Finish 40 - 50 rounds with self-defined objective function and eval metric...')

 print('Start a new training job...')
+
+
 # callback
 def reset_metrics():
    def callback(env):
@@ -131,6 +136,7 @@ def reset_metrics():
    callback.order = 0
    return callback

+
 gbm = lgb.train(params,
                lgb_train,
                num_boost_round=10,

--- a/examples/python-guide/simple_example.py
+++ b/examples/python-guide/simple_example.py
@@ -21,16 +21,16 @@ lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train)

 # specify your configurations as a dict
 params = {
-    'task' : 'train',
-    'boosting_type' : 'gbdt',
-    'objective' : 'regression',
-    'metric' : {'l2', 'auc'},
-    'num_leaves' : 31,
-    'learning_rate' : 0.05,
-    'feature_fraction' : 0.9,
-    'bagging_fraction' : 0.8,
+    'task': 'train',
+    'boosting_type': 'gbdt',
+    'objective': 'regression',
+    'metric': {'l2', 'auc'},
+    'num_leaves': 31,
+    'learning_rate': 0.05,
+    'feature_fraction': 0.9,
+    'bagging_fraction': 0.8,
    'bagging_freq': 5,
-    'verbose' : 0
+    'verbose': 0
 }

 print('Start training...')

--- a/python-package/README.rst
+++ b/python-package/README.rst
@@ -31,3 +31,19 @@ Troubleshooting
    setup.py directory, *never* absolute paths.

 - **Solution 1**: please check `here <http://stackoverflow.com/questions/18085571/pip-install-error-setup-script-specifies-an-absolute-path>`__.
+
+
+Developments
+--------
+
+The code style of python package follows `pep-8 <https://www.python.org/dev/peps/pep-0008/>`__. If you would like to make a contribution and not familiar with pep-8, please check the pep-8 style guide first. Otherwise, you won't pass the check. You should be careful about:
+
+- E1 Indentation (check pep-8 link above)
+- E202 whitespace before and after brackets
+- E225 missing whitespace around operator
+- E226 missing whitespace around arithmetic operator
+- E261 at least two spaces before inline comment
+- E301 expected 1 blank line in front of and at the end of a method
+- E302 expected 2 blank lines in front of and at the end of a function or a class
+
+You can ignore E501 (line too long).
--- a/python-package/lightgbm/__init__.py
+++ b/python-package/lightgbm/__init__.py
@@ -6,8 +6,6 @@ Contributors: https://github.com/Microsoft/LightGBM/graphs/contributors

 from __future__ import absolute_import

-import os
-
 from .basic import Dataset, Booster
 from .engine import train, cv
 from .callback import print_evaluation, record_evaluation, reset_parameter, early_stopping
@@ -23,4 +21,3 @@ __all__ = ['Dataset', 'Booster',
           'train', 'cv',
           'LGBMModel', 'LGBMRegressor', 'LGBMClassifier', 'LGBMRanker',
           'print_evaluation', 'record_evaluation', 'reset_parameter', 'early_stopping']
-
--- a/python-package/lightgbm/basic.py
+++ b/python-package/lightgbm/basic.py
@@ -5,11 +5,11 @@
 """Wrapper c_api of LightGBM"""
 from __future__ import absolute_import

-import sys
 import ctypes
 import json
-from tempfile import NamedTemporaryFile
 import os
+import sys
+from tempfile import NamedTemporaryFile

 import numpy as np
 import scipy.sparse
@@ -22,11 +22,13 @@ try:
 except ImportError:
    class Series(object):
        pass
+
    class DataFrame(object):
        pass

 IS_PY3 = (sys.version_info[0] == 3)

+
 def _load_lib():
    """Load LightGBM Library."""
    lib_path = find_lib_path()
@@ -36,12 +38,15 @@ def _load_lib():
    lib.LGBM_GetLastError.restype = ctypes.c_char_p
    return lib

+
 _LIB = _load_lib()

+
 class LightGBMError(Exception):
    """Error throwed by LightGBM"""
    pass

+
 def _safe_call(ret):
    """Check the return value of C API call
    Parameters
@@ -52,6 +57,7 @@ def _safe_call(ret):
    if ret != 0:
        raise LightGBMError(_LIB.LGBM_GetLastError())

+
 def is_str(s):
    """Check is a str or not"""
    if IS_PY3:
@@ -59,6 +65,7 @@ def is_str(s):
    else:
        return isinstance(s, basestring)

+
 def is_numeric(obj):
    """Check is a number or not, include numpy number etc."""
    try:
@@ -67,19 +74,23 @@ def is_numeric(obj):
    except:
        return False

+
 def is_numpy_object(data):
    """Check is numpy object"""
    return type(data).__module__ == np.__name__

+
 def is_numpy_1d_array(data):
    """Check is 1d numpy array"""
    return isinstance(data, np.ndarray) and len(data.shape) == 1

+
 def is_1d_list(data):
    """Check is 1d list"""
    return isinstance(data, list) and \
        (not data or isinstance(data[0], (int, float, bool)))

+
 def list_to_1d_numpy(data, dtype=np.float32, name='list'):
    """convert to 1d numpy array"""
    if is_numpy_1d_array(data):
@@ -94,6 +105,7 @@ def list_to_1d_numpy(data, dtype=np.float32, name='list'):
    else:
        raise TypeError("Wrong type({}) for {}, should be list or numpy array".format(type(data).__name__, name))

+
 def cfloat32_array_to_numpy(cptr, length):
    """Convert a ctypes float pointer array to a numpy array.
    """
@@ -102,6 +114,7 @@ def cfloat32_array_to_numpy(cptr, length):
    else:
        raise RuntimeError('Expected float pointer')

+
 def cint32_array_to_numpy(cptr, length):
    """Convert a ctypes float pointer array to a numpy array.
    """
@@ -110,44 +123,52 @@ def cint32_array_to_numpy(cptr, length):
    else:
        raise RuntimeError('Expected int pointer')

+
 def c_str(string):
    """Convert a python string to cstring."""
    return ctypes.c_char_p(string.encode('utf-8'))

+
 def c_array(ctype, values):
    """Convert a python array to c array."""
    return (ctype * len(values))(*values)

+
 def param_dict_to_str(data):
    if data is None or not data:
        return ""
    pairs = []
    for key, val in data.items():
        if isinstance(val, (list, tuple, set)) or is_numpy_1d_array(val):
-            pairs.append(str(key)+'='+','.join(map(str, val)))
+            pairs.append(str(key) + '=' + ','.join(map(str, val)))
        elif is_str(val) or isinstance(val, (int, float, bool)) or is_numeric(val):
-            pairs.append(str(key)+'='+str(val))
+            pairs.append(str(key) + '=' + str(val))
        else:
            raise TypeError('Unknown type of parameter:%s, got:%s'
                            % (key, type(val).__name__))
    return ' '.join(pairs)

+
 class _temp_file(object):
    def __enter__(self):
        with NamedTemporaryFile(prefix="lightgbm_tmp_", delete=True) as f:
            self.name = f.name
        return self
+
    def __exit__(self, exc_type, exc_val, exc_tb):
        if os.path.isfile(self.name):
            os.remove(self.name)
+
    def readlines(self):
        with open(self.name, "r+") as f:
            ret = f.readlines()
        return ret
+
    def writelines(self, lines):
        with open(self.name, "w+") as f:
            f.writelines(lines)

+
 """marco definition of data type in c_api of LightGBM"""
 C_API_DTYPE_FLOAT32 = 0
 C_API_DTYPE_FLOAT64 = 1
@@ -168,6 +189,7 @@ FIELD_TYPE_MAPPER = {"label": C_API_DTYPE_FLOAT32,
                     "init_score": C_API_DTYPE_FLOAT32,
                     "group": C_API_DTYPE_INT32}

+
 def c_float_array(data):
    """get pointer of float numpy array / list"""
    if is_1d_list(data):
@@ -186,6 +208,7 @@ def c_float_array(data):
        raise TypeError("Unknown type({})".format(type(data).__name__))
    return (ptr_data, type_data)

+
 def c_int_array(data):
    """get pointer of int numpy array / list"""
    if is_1d_list(data):
@@ -204,6 +227,7 @@ def c_int_array(data):
        raise TypeError("Unknown type({})".format(type(data).__name__))
    return (ptr_data, type_data)

+
 class _InnerPredictor(object):
    """
    A _InnerPredictor of LightGBM.
@@ -255,7 +279,6 @@ class _InnerPredictor(object):
        if self.__is_manage_handle:
            _safe_call(_LIB.LGBM_BoosterFree(self.handle))

-
    def predict(self, data, num_iteration=-1,
                raw_score=False, pred_leaf=False, data_has_header=False,
                is_reshape=True):
@@ -374,8 +397,7 @@ class _InnerPredictor(object):
            predict_type,
            num_iteration,
            ctypes.byref(out_num_preds),
-            preds.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
-            ))
+            preds.ctypes.data_as(ctypes.POINTER(ctypes.c_double))))
        if n_preds != out_num_preds.value:
            raise ValueError("Wrong length for predict results")
        return preds, mat.shape[0]
@@ -405,8 +427,7 @@ class _InnerPredictor(object):
            predict_type,
            num_iteration,
            ctypes.byref(out_num_preds),
-            preds.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
-            ))
+            preds.ctypes.data_as(ctypes.POINTER(ctypes.c_double))))
        if n_preds != out_num_preds.value:
            raise ValueError("Wrong length for predict results")
        return preds, nrow
@@ -436,17 +457,18 @@ class _InnerPredictor(object):
            predict_type,
            num_iteration,
            ctypes.byref(out_num_preds),
-            preds.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
-            ))
+            preds.ctypes.data_as(ctypes.POINTER(ctypes.c_double))))
        if n_preds != out_num_preds.value:
            raise ValueError("Wrong length for predict results")
        return preds, nrow

+
 PANDAS_DTYPE_MAPPER = {'int8': 'int', 'int16': 'int', 'int32': 'int',
                       'int64': 'int', 'uint8': 'int', 'uint16': 'int',
                       'uint32': 'int', 'uint64': 'int', 'float16': 'float',
                       'float32': 'float', 'float64': 'float', 'bool': 'int'}

+
 def _data_from_pandas(data):
    if isinstance(data, DataFrame):
        data_dtypes = data.dtypes
@@ -459,6 +481,7 @@ def _data_from_pandas(data):
        data = data.values.astype('float')
    return data

+
 def _label_from_pandas(label):
    if isinstance(label, DataFrame):
        if len(label.columns) > 1:
@@ -469,6 +492,7 @@ def _label_from_pandas(label):
        label = label.values.astype('float')
    return label

+
 class _InnerDataset(object):
    """_InnerDataset used in LightGBM.
    _InnerDataset is a internal data structure that used by LightGBM.
@@ -536,8 +560,8 @@ class _InnerDataset(object):
                elif isinstance(name, int):
                    categorical_indices.add(name)
                else:
-                    raise TypeError("Wrong type({}) or unknown name({}) in categorical_feature" \
-                        .format(type(name).__name__, name))
+                    raise TypeError("Wrong type({}) or unknown name({}) in categorical_feature"
+                                    .format(type(name).__name__, name))

            params['categorical_column'] = sorted(categorical_indices)

@@ -552,7 +576,7 @@ class _InnerDataset(object):
        if is_str(data):
            """check data has header or not"""
            if str(params.get("has_header", "")).lower() == "true" \
-                or str(params.get("header", "")).lower() == "true":
+                    or str(params.get("header", "")).lower() == "true":
                self.data_has_header = True
            self.handle = ctypes.c_void_p()
            _safe_call(_LIB.LGBM_DatasetCreateFromFile(
@@ -927,6 +951,7 @@ class _InnerDataset(object):
                                                  ctypes.byref(ret)))
        return ret.value

+
 class Dataset(object):
    """High level Dataset used in LightGBM.
    """
@@ -1140,7 +1165,6 @@ class Dataset(object):
        """
        self._get_inner_dataset().save_binary(filename)

-
    def set_label(self, label):
        """
        Set label of Dataset
@@ -1273,6 +1297,7 @@ class Dataset(object):
        else:
            raise LightGBMError("Cannot call num_feature before construct, please call it explicitly")

+
 class Booster(object):
    """"A Booster of LightGBM.
    """
@@ -1397,7 +1422,7 @@ class Booster(object):
            Name of validation data
        """
        if not isinstance(data, Dataset):
-            raise TypeError('valid data should be Dataset instance, met {}'.format(type(train_set).__name__))
+            raise TypeError('valid data should be Dataset instance, met {}'.format(type(data).__name__))
        if data._predictor is not self.__init_predictor:
            raise LightGBMError("Add validation data failed, you should use same predictor for these data")
        _safe_call(_LIB.LGBM_BoosterAddValidData(
@@ -1578,8 +1603,8 @@ class Booster(object):
        result: str
            Evaluation result list.
        """
-        return [item for i in range(1, self.__num_dataset) \
-            for item in self.__inner_eval(self.name_valid_sets[i-1], i, feval)]
+        return [item for i in range(1, self.__num_dataset)
+                for item in self.__inner_eval(self.name_valid_sets[i - 1], i, feval)]

    def save_model(self, filename, num_iteration=-1):
        """
@@ -1684,6 +1709,7 @@ class Booster(object):
            raise KeyError("importance_type must be split or gain")
        dump_model = self.dump_model()
        ret = [0] * (dump_model["max_feature_idx"] + 1)
+
        def dfs(root):
            if "split_feature" in root:
                if importance_type == 'split':
@@ -1773,7 +1799,7 @@ class Booster(object):
                """Get name of evals"""
                tmp_out_len = ctypes.c_int64(0)
                string_buffers = [ctypes.create_string_buffer(255) for i in range(self.__num_inner_eval)]
-                ptr_string_buffers = (ctypes.c_char_p*self.__num_inner_eval)(*map(ctypes.addressof, string_buffers))
+                ptr_string_buffers = (ctypes.c_char_p * self.__num_inner_eval)(*map(ctypes.addressof, string_buffers))
                _safe_call(_LIB.LGBM_BoosterGetEvalNames(
                    self.handle,
                    ctypes.byref(tmp_out_len),

--- a/python-package/lightgbm/callback.py
+++ b/python-package/lightgbm/callback.py
@@ -3,6 +3,7 @@
 from __future__ import absolute_import
 import collections

+
 class EarlyStopException(Exception):
    """Exception of early stopping.
    Parameters
@@ -14,6 +15,7 @@ class EarlyStopException(Exception):
        super(EarlyStopException, self).__init__()
        self.best_iteration = best_iteration

+
 # Callback environment used by callbacks
 CallbackEnv = collections.namedtuple(
    "LightGBMCallbackEnv",
@@ -24,6 +26,7 @@ CallbackEnv = collections.namedtuple(
     "end_iteration",
     "evaluation_result_list"])

+
 def _format_eval_result(value, show_stdv=True):
    """format metric string"""
    if len(value) == 4:
@@ -58,8 +61,9 @@ def print_evaluation(period=1, show_stdv=True):
        if not env.evaluation_result_list or period <= 0:
            return
        if (env.iteration + 1) % period == 0:
-            result = '\t'.join([_format_eval_result(x, show_stdv) \
-                for x in env.evaluation_result_list])
+            result = '\t'.join(
+                [_format_eval_result(x, show_stdv) for x in env.evaluation_result_list]
+            )
            print('[%d]\t%s' % (env.iteration + 1, result))
    callback.order = 10
    return callback
@@ -152,6 +156,7 @@ def early_stopping(stopping_rounds, verbose=True):
    best_score = {}
    best_iter = {}
    best_msg = {}
+
    def init(env):
        """internal function"""
        if not env.evaluation_result_list:
@@ -178,8 +183,11 @@ def early_stopping(stopping_rounds, verbose=True):
                best_score[i] = score
                best_iter[i] = env.iteration
                if verbose:
-                    best_msg[i] = '[%d]\t%s' % (env.iteration + 1, \
-                        '\t'.join([_format_eval_result(x) for x in env.evaluation_result_list]))
+                    best_msg[i] = '[%d]\t%s' % (
+                        env.iteration + 1, '\t'.join(
+                            [_format_eval_result(x) for x in env.evaluation_result_list]
+                        )
+                    )
            else:
                if env.iteration - best_iter[i] >= stopping_rounds:
                    if env.model is not None:

--- a/python-package/lightgbm/engine.py
+++ b/python-package/lightgbm/engine.py
@@ -9,6 +9,7 @@ import numpy as np
 from .basic import LightGBMError, _InnerPredictor, Dataset, Booster, is_str
 from . import callback

+
 def train(params, train_set, num_boost_round=100,
          valid_sets=None, valid_names=None,
          fobj=None, feval=None, init_model=None,
@@ -121,7 +122,7 @@ def train(params, train_set, num_boost_round=100,
            if valid_names is not None and len(valid_names) > i:
                name_valid_sets.append(valid_names[i])
            else:
-                name_valid_sets.append('valid_'+str(i))
+                name_valid_sets.append('valid_' + str(i))
        for valid_data in valid_sets:
            valid_data._update_params(params)
    """process callbacks"""
@@ -211,6 +212,7 @@ class CVBooster(object):
        """"Evaluate the CVBooster for one iteration."""
        return self.booster.eval_valid(feval)

+
 try:
    from sklearn.model_selection import StratifiedKFold
    SKLEARN_StratifiedKFold = True
@@ -221,6 +223,7 @@ except ImportError:
    except ImportError:
        SKLEARN_StratifiedKFold = False

+
 def _make_n_folds(full_data, nfold, params, seed, fpreproc=None, stratified=False, shuffle=True):
    """
    Make an n-fold list of CVBooster from random indices.
@@ -251,6 +254,7 @@ def _make_n_folds(full_data, nfold, params, seed, fpreproc=None, stratified=Fals
        ret.append(CVBooster(train_set, valid_set, tparam))
    return ret

+
 def _agg_cv_result(raw_results):
    """
    Aggregate cross-validation results.
@@ -263,6 +267,7 @@ def _agg_cv_result(raw_results):
            cvmap[one_line[1]].append(one_line[2])
    return [('cv_agg', k, np.mean(v), metric_type[k], np.std(v)) for k, v in cvmap.items()]

+
 def cv(params, train_set, num_boost_round=10, nfold=5, stratified=False,
       shuffle=True, metrics=None, fobj=None, feval=None, init_model=None,
       feature_name=None, categorical_feature=None,

--- a/python-package/lightgbm/libpath.py
+++ b/python-package/lightgbm/libpath.py
@@ -26,5 +26,5 @@ def find_lib_path():
    lib_path = [p for p in dll_path if os.path.exists(p) and os.path.isfile(p)]
    if not lib_path:
        dll_path = [os.path.realpath(p) for p in dll_path]
-        raise Exception('Cannot find lightgbm Library in following paths: '+','.join(dll_path))
+        raise Exception('Cannot find lightgbm Library in following paths: ' + ','.join(dll_path))
    return lib_path
--- a/python-package/lightgbm/sklearn.py
+++ b/python-package/lightgbm/sklearn.py
@@ -25,6 +25,7 @@ except ImportError:
    LGBMRegressorBase = object
    LGBMLabelEncoder = None

+
 def _objective_function_wrapper(func):
    """Decorate an objective function
    Note: for multi-class task, the y_pred is group by class_id first, then group by row_id
@@ -62,7 +63,7 @@ def _objective_function_wrapper(func):
        elif argc == 3:
            grad, hess = func(labels, preds, dataset.get_group())
        else:
-            raise TypeError("Self-defined objective function should have 2 or 3 arguments, got %d" %(argc))
+            raise TypeError("Self-defined objective function should have 2 or 3 arguments, got %d" % argc)
        """weighted for objective"""
        weight = dataset.get_weight()
        if weight is not None:
@@ -83,6 +84,7 @@ def _objective_function_wrapper(func):
        return grad, hess
    return inner

+
 def _eval_function_wrapper(func):
    """Decorate an eval function
    Note: for multi-class task, the y_pred is group by class_id first, then group by row_id
@@ -128,9 +130,10 @@ def _eval_function_wrapper(func):
        elif argc == 4:
            return func(labels, preds, dataset.get_weight(), dataset.get_group())
        else:
-            raise TypeError("Self-defined eval function should have 2, 3 or 4 arguments, got %d" %(argc))
+            raise TypeError("Self-defined eval function should have 2, 3 or 4 arguments, got %d" % argc)
    return inner

+
 class LGBMModel(LGBMModelBase):

    def __init__(self, boosting_type="gbdt", num_leaves=31, max_depth=-1,
@@ -354,9 +357,9 @@ class LGBMModel(LGBMModelBase):
        if hasattr(self, 'eval_at'):
            params['ndcg_eval_at'] = self.eval_at
        if self.fobj:
-            params['objective'] = 'None' # objective = nullptr for unknown objective
+            params['objective'] = 'None'  # objective = nullptr for unknown objective
        if 'label_gain' in params and params['label_gain'] is None:
-            del params['label_gain'] # use default of cli version
+            del params['label_gain']  # use default of cli version

        if callable(eval_metric):
            feval = _eval_function_wrapper(eval_metric)
@@ -474,6 +477,7 @@ class LGBMModel(LGBMModelBase):
    def feature_importance(self):
        return self.feature_importance_

+
 class LGBMRegressor(LGBMModel, LGBMRegressorBase):

    def fit(self, X, y,
@@ -495,6 +499,7 @@ class LGBMRegressor(LGBMModel, LGBMRegressorBase):
                                       callbacks=callbacks)
        return self

+
 class LGBMClassifier(LGBMModel, LGBMClassifierBase):

    def __init__(self, boosting_type="gbdt", num_leaves=31, max_depth=-1,
@@ -595,6 +600,7 @@ class LGBMClassifier(LGBMModel, LGBMClassifierBase):
            raise LightGBMError('No classes found. Need to call fit beforehand.')
        return self.n_classes

+
 class LGBMRanker(LGBMModel):

    def __init__(self, boosting_type="gbdt", num_leaves=31, max_depth=-1,
@@ -646,7 +652,7 @@ class LGBMRanker(LGBMModel):
            elif len(eval_group) != len(eval_set):
                raise ValueError("Length of eval_group should equal to eval_set")
            elif (isinstance(eval_group, dict) and any(i not in eval_group or eval_group[i] is None for i in range(len(eval_group)))) \
-                or (isinstance(eval_group, list) and any(group is None for group in eval_group)):
+                    or (isinstance(eval_group, list) and any(group is None for group in eval_group)):
                raise ValueError("Should set group for all eval dataset for ranking task; if you use dict, the index should start from 0")

        if eval_at is not None:

--- a/tests/c_api_test/test.py
+++ b/tests/c_api_test/test.py
-import sys
-import os
+# coding: utf-8
+# pylint: skip-file
 import ctypes
-import collections
+import os

 import numpy as np
 from scipy import sparse

+
 def LoadDll():
    if os.name == 'nt':
        lib_path = '../../windows/x64/DLL/lib_lightgbm.dll'
@@ -14,6 +15,7 @@ def LoadDll():
    lib = ctypes.cdll.LoadLibrary(lib_path)
    return lib

+
 LIB = LoadDll()

 LIB.LGBM_GetLastError.restype = ctypes.c_char_p
@@ -27,25 +29,29 @@ dtype_int64 = 3
 def c_array(ctype, values):
    return (ctype * len(values))(*values)

+
 def c_str(string):
    return ctypes.c_char_p(string.encode('ascii'))

+
 def test_load_from_file(filename, reference):
    ref = None
-    if reference != None:
+    if reference is not None:
        ref = reference
    handle = ctypes.c_void_p()
-    LIB.LGBM_DatasetCreateFromFile(c_str(filename), 
-        c_str('max_bin=15'), 
-        ref, ctypes.byref(handle) )
+    LIB.LGBM_DatasetCreateFromFile(
+        c_str(filename),
+        c_str('max_bin=15'),
+        ref, ctypes.byref(handle))
    print(LIB.LGBM_GetLastError())
    num_data = ctypes.c_long()
-    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data) )
+    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data))
    num_feature = ctypes.c_long()
-    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature) )
-    print ('#data:%d #feature:%d' %(num_data.value, num_feature.value) ) 
+    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature))
+    print('#data:%d #feature:%d' % (num_data.value, num_feature.value))
    return handle

+
 def test_save_to_binary(handle, filename):
    LIB.LGBM_DatasetSaveBinary(handle, c_str(filename))

@@ -55,105 +61,113 @@ def test_load_from_csr(filename, reference):
    label = []
    inp = open(filename, 'r')
    for line in inp.readlines():
-        data.append( [float(x) for x in line.split('\t')[1:]] )
-        label.append( float(line.split('\t')[0]) )
+        data.append([float(x) for x in line.split('\t')[1:]])
+        label.append(float(line.split('\t')[0]))
    inp.close()
    mat = np.array(data)
    label = np.array(label, dtype=np.float32)
    csr = sparse.csr_matrix(mat)
    handle = ctypes.c_void_p()
    ref = None
-    if reference != None:
+    if reference is not None:
        ref = reference

-    LIB.LGBM_DatasetCreateFromCSR(c_array(ctypes.c_int, csr.indptr), 
-        dtype_int32, 
-        c_array(ctypes.c_int, csr.indices), 
+    LIB.LGBM_DatasetCreateFromCSR(
+        c_array(ctypes.c_int, csr.indptr),
+        dtype_int32,
+        c_array(ctypes.c_int, csr.indices),
        csr.data.ctypes.data_as(ctypes.POINTER(ctypes.c_void_p)),
-        dtype_float64, 
-        len(csr.indptr), 
+        dtype_float64,
+        len(csr.indptr),
        len(csr.data),
-        csr.shape[1], 
-        c_str('max_bin=15'), 
-        ref, 
-        ctypes.byref(handle) )
+        csr.shape[1],
+        c_str('max_bin=15'),
+        ref,
+        ctypes.byref(handle))
    num_data = ctypes.c_long()
-    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data) )
+    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data))
    num_feature = ctypes.c_long()
-    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature) )
+    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature))
    LIB.LGBM_DatasetSetField(handle, c_str('label'), c_array(ctypes.c_float, label), len(label), 0)
-    print ('#data:%d #feature:%d' %(num_data.value, num_feature.value) ) 
+    print('#data:%d #feature:%d' % (num_data.value, num_feature.value))
    return handle

+
 def test_load_from_csc(filename, reference):
    data = []
    label = []
    inp = open(filename, 'r')
    for line in inp.readlines():
-        data.append( [float(x) for x in line.split('\t')[1:]] )
-        label.append( float(line.split('\t')[0]) )
+        data.append([float(x) for x in line.split('\t')[1:]])
+        label.append(float(line.split('\t')[0]))
    inp.close()
    mat = np.array(data)
    label = np.array(label, dtype=np.float32)
    csr = sparse.csc_matrix(mat)
    handle = ctypes.c_void_p()
    ref = None
-    if reference != None:
+    if reference is not None:
        ref = reference

-    LIB.LGBM_DatasetCreateFromCSC(c_array(ctypes.c_int, csr.indptr), 
-        dtype_int32, 
-        c_array(ctypes.c_int, csr.indices), 
+    LIB.LGBM_DatasetCreateFromCSC(
+        c_array(ctypes.c_int, csr.indptr),
+        dtype_int32,
+        c_array(ctypes.c_int, csr.indices),
        csr.data.ctypes.data_as(ctypes.POINTER(ctypes.c_void_p)),
-        dtype_float64, 
-        len(csr.indptr), 
+        dtype_float64,
+        len(csr.indptr),
        len(csr.data),
-        csr.shape[0], 
-        c_str('max_bin=15'), 
-        ref, 
-        ctypes.byref(handle) )
+        csr.shape[0],
+        c_str('max_bin=15'),
+        ref,
+        ctypes.byref(handle))
    num_data = ctypes.c_long()
-    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data) )
+    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data))
    num_feature = ctypes.c_long()
-    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature) )
+    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature))
    LIB.LGBM_DatasetSetField(handle, c_str('label'), c_array(ctypes.c_float, label), len(label), 0)
-    print ('#data:%d #feature:%d' %(num_data.value, num_feature.value) ) 
+    print('#data:%d #feature:%d' % (num_data.value, num_feature.value))
    return handle

+
 def test_load_from_mat(filename, reference):
    data = []
    label = []
    inp = open(filename, 'r')
    for line in inp.readlines():
-        data.append( [float(x) for x in line.split('\t')[1:]] )
-        label.append( float(line.split('\t')[0]) )
+        data.append([float(x) for x in line.split('\t')[1:]])
+        label.append(float(line.split('\t')[0]))
    inp.close()
    mat = np.array(data)
    data = np.array(mat.reshape(mat.size), copy=False)
    label = np.array(label, dtype=np.float32)
    handle = ctypes.c_void_p()
    ref = None
-    if reference != None:
+    if reference is not None:
        ref = reference

-    LIB.LGBM_DatasetCreateFromMat(data.ctypes.data_as(ctypes.POINTER(ctypes.c_void_p)), 
+    LIB.LGBM_DatasetCreateFromMat(data.ctypes.data_as(
+        ctypes.POINTER(ctypes.c_void_p)),
        dtype_float64,
        mat.shape[0],
        mat.shape[1],
        1,
-        c_str('max_bin=15'), 
-        ref, 
-        ctypes.byref(handle) )
+        c_str('max_bin=15'),
+        ref,
+        ctypes.byref(handle))
    num_data = ctypes.c_long()
-    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data) )
+    LIB.LGBM_DatasetGetNumData(handle, ctypes.byref(num_data))
    num_feature = ctypes.c_long()
-    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature) )
+    LIB.LGBM_DatasetGetNumFeature(handle, ctypes.byref(num_feature))
    LIB.LGBM_DatasetSetField(handle, c_str('label'), c_array(ctypes.c_float, label), len(label), 0)
-    print ('#data:%d #feature:%d' %(num_data.value, num_feature.value) ) 
+    print('#data:%d #feature:%d' % (num_data.value, num_feature.value))
    return handle
+
+
 def test_free_dataset(handle):
    LIB.LGBM_DatasetFree(handle)

+
 def test_dataset():
    train = test_load_from_file('../../examples/binary_classification/binary.train', None)
    test = test_load_from_mat('../../examples/binary_classification/binary.test', train)
@@ -164,8 +178,10 @@ def test_dataset():
    test_free_dataset(test)
    test_save_to_binary(train, 'train.binary.bin')
    test_free_dataset(train)
-    train  = test_load_from_file('train.binary.bin', None)
+    train = test_load_from_file('train.binary.bin', None)
    test_free_dataset(train)
+
+
 def test_booster():
    train = test_load_from_mat('../../examples/binary_classification/binary.train', None)
    test = test_load_from_mat('../../examples/binary_classification/binary.test', train)
@@ -174,11 +190,11 @@ def test_booster():
    LIB.LGBM_BoosterAddValidData(booster, test)
    is_finished = ctypes.c_int(0)
    for i in range(100):
-        LIB.LGBM_BoosterUpdateOneIter(booster,ctypes.byref(is_finished))
+        LIB.LGBM_BoosterUpdateOneIter(booster, ctypes.byref(is_finished))
        result = np.array([0.0], dtype=np.float64)
        out_len = ctypes.c_ulong(0)
        LIB.LGBM_BoosterGetEval(booster, 0, ctypes.byref(out_len), result.ctypes.data_as(ctypes.POINTER(ctypes.c_double)))
-        print ('%d Iteration test AUC %f' %(i, result[0]))
+        print('%d Iteration test AUC %f' % (i, result[0]))
    LIB.LGBM_BoosterSaveModel(booster, -1, c_str('model.txt'))
    LIB.LGBM_BoosterFree(booster)
    test_free_dataset(train)
@@ -189,14 +205,15 @@ def test_booster():
    data = []
    inp = open('../../examples/binary_classification/binary.test', 'r')
    for line in inp.readlines():
-        data.append( [float(x) for x in line.split('\t')[1:]] )
+        data.append([float(x) for x in line.split('\t')[1:]])
    inp.close()
    mat = np.array(data)
    preb = np.zeros(mat.shape[0], dtype=np.float64)
    num_preb = ctypes.c_long()
    data = np.array(mat.reshape(mat.size), copy=False)
-    LIB.LGBM_BoosterPredictForMat(booster2,
-        data.ctypes.data_as(ctypes.POINTER(ctypes.c_void_p)), 
+    LIB.LGBM_BoosterPredictForMat(
+        booster2,
+        data.ctypes.data_as(ctypes.POINTER(ctypes.c_void_p)),
        dtype_float64,
        mat.shape[0],
        mat.shape[1],
@@ -205,8 +222,9 @@ def test_booster():
        50,
        ctypes.byref(num_preb),
        preb.ctypes.data_as(ctypes.POINTER(ctypes.c_double)))
-    LIB.LGBM_BoosterPredictForFile(booster2,c_str('../../examples/binary_classification/binary.test'),0 , 0, 50, c_str('preb.txt'))
+    LIB.LGBM_BoosterPredictForFile(booster2, c_str('../../examples/binary_classification/binary.test'), 0, 0, 50, c_str('preb.txt'))
    LIB.LGBM_BoosterFree(booster2)

+
 test_dataset()
 test_booster()
--- a/tests/python_package_test/test_basic.py
+++ b/tests/python_package_test/test_basic.py
 # coding: utf-8
 # pylint: skip-file
-import unittest, tempfile, os
+import os
+import tempfile
+import unittest
+
+import lightgbm as lgb
 import numpy as np
 from sklearn.datasets import load_breast_cancer
 from sklearn.model_selection import train_test_split
-import lightgbm as lgb
+

 class TestBasic(unittest.TestCase):

@@ -14,11 +18,11 @@ class TestBasic(unittest.TestCase):
        valid_data = train_data.create_valid(X_test, label=y_test)

        params = {
-            "objective" : "binary",
-            "metric" : "auc",
-            "min_data" : 1,
-            "num_leaves" : 15,
-            "verbose" : -1
+            "objective": "binary",
+            "metric": "auc",
+            "min_data": 1,
+            "num_leaves": 15,
+            "verbose": -1
        }
        bst = lgb.Booster(params, train_data)
        bst.add_valid(valid_data, "valid_1")
@@ -38,13 +42,14 @@ class TestBasic(unittest.TestCase):
        self.assertEqual(len(pred_from_matr), len(pred_from_file))
        for preds in zip(pred_from_matr, pred_from_file):
            self.assertAlmostEqual(*preds, places=15)
-        #check saved model persistence
+        # check saved model persistence
        bst = lgb.Booster(params, model_file="model.txt")
        pred_from_model_file = bst.predict(X_test)
        self.assertEqual(len(pred_from_matr), len(pred_from_model_file))
        for preds in zip(pred_from_matr, pred_from_model_file):
            self.assertAlmostEqual(*preds, places=15)

+
 print("----------------------------------------------------------------------")
 print("running test_basic.py")
 unittest.main()
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
 # coding: utf-8
 # pylint: skip-file
-import os, unittest, math, copy
-import numpy as np
+import copy
+import math
+import os
+import unittest
+
 import lightgbm as lgb
-from sklearn.metrics import log_loss, mean_squared_error, mean_absolute_error
-from sklearn.datasets import load_breast_cancer, load_boston, load_digits, load_iris
+import numpy as np
+from sklearn.datasets import (load_boston, load_breast_cancer, load_digits,
+                              load_iris)
+from sklearn.metrics import log_loss, mean_absolute_error, mean_squared_error
 from sklearn.model_selection import train_test_split
+
 try:
    import cPickle as pickle
 except:
    import pickle

+
 def multi_logloss(y_true, y_pred):
    return np.mean([-math.log(y_pred[i][y]) for i, y in enumerate(y_true)])

-def test_template(params = {'objective' : 'regression', 'metric' : 'l2'},
+
+def test_template(params={'objective': 'regression', 'metric': 'l2'},
                  X_y=load_boston(True), feval=mean_squared_error,
                  num_round=100, init_model=None, custom_eval=None,
                  early_stopping_rounds=10,
@@ -23,7 +31,8 @@ def test_template(params = {'objective' : 'regression', 'metric' : 'l2'},
    X_train, X_test, y_train, y_test = train_test_split(*X_y, test_size=0.1, random_state=42)
    lgb_train = lgb.Dataset(X_train, y_train, params=params)
    lgb_eval = lgb.Dataset(X_test, y_test, reference=lgb_train, params=params)
-    if return_data: return lgb_train, lgb_eval
+    if return_data:
+        return lgb_train, lgb_eval
    evals_result = {}
    gbm = lgb.train(params, lgb_train,
                    num_boost_round=num_round,
@@ -34,16 +43,19 @@ def test_template(params = {'objective' : 'regression', 'metric' : 'l2'},
                    evals_result=evals_result,
                    early_stopping_rounds=early_stopping_rounds,
                    init_model=init_model)
-    if return_model: return gbm
-    else: return evals_result, feval(y_test, gbm.predict(X_test, gbm.best_iteration))
+    if return_model:
+        return gbm
+    else:
+        return evals_result, feval(y_test, gbm.predict(X_test, gbm.best_iteration))
+

 class TestEngine(unittest.TestCase):

    def test_binary(self):
-        X_y= load_breast_cancer(True)
+        X_y = load_breast_cancer(True)
        params = {
-            'objective' : 'binary',
-            'metric' : 'binary_logloss'
+            'objective': 'binary',
+            'metric': 'binary_logloss'
        }
        evals_result, ret = test_template(params, X_y, log_loss)
        self.assertLess(ret, 0.15)
@@ -58,9 +70,9 @@ class TestEngine(unittest.TestCase):
    def test_multiclass(self):
        X_y = load_digits(10, True)
        params = {
-            'objective' : 'multiclass',
-            'metric' : 'multi_logloss',
-            'num_class' : 10
+            'objective': 'multiclass',
+            'metric': 'multi_logloss',
+            'num_class': 10
        }
        evals_result, ret = test_template(params, X_y, multi_logloss)
        self.assertLess(ret, 0.2)
@@ -68,8 +80,8 @@ class TestEngine(unittest.TestCase):

    def test_continue_train_and_other(self):
        params = {
-            'objective' : 'regression',
-            'metric' : 'l1'
+            'objective': 'regression',
+            'metric': 'l1'
        }
        model_name = 'model.txt'
        gbm = test_template(params, num_round=20, return_model=True, early_stopping_rounds=-1)
@@ -88,19 +100,19 @@ class TestEngine(unittest.TestCase):
    def test_continue_train_multiclass(self):
        X_y = load_iris(True)
        params = {
-            'objective' : 'multiclass',
-            'metric' : 'multi_logloss',
-            'num_class' : 3
+            'objective': 'multiclass',
+            'metric': 'multi_logloss',
+            'num_class': 3
        }
        gbm = test_template(params, X_y, num_round=20, return_model=True, early_stopping_rounds=-1)
        evals_result, ret = test_template(params, X_y, feval=multi_logloss,
-                                        num_round=80, init_model=gbm)
+                                          num_round=80, init_model=gbm)
        self.assertLess(ret, 1.5)
        self.assertAlmostEqual(min(evals_result['eval']['multi_logloss']), ret, places=5)

    def test_cv(self):
        lgb_train, _ = test_template(return_data=True)
-        lgb.cv({'verbose':0}, lgb_train, num_boost_round=20, nfold=5,
+        lgb.cv({'verbose': 0}, lgb_train, num_boost_round=20, nfold=5,
               metrics='l1', verbose_eval=False)

    def test_save_load_copy_pickle(self):
@@ -123,6 +135,7 @@ class TestEngine(unittest.TestCase):
        for ret in other_ret:
            self.assertAlmostEqual(ret_origin, ret, places=5)

+
 print("----------------------------------------------------------------------")
 print("running test_engine.py")
 unittest.main()
--- a/tests/python_package_test/test_sklearn.py
+++ b/tests/python_package_test/test_sklearn.py
 # coding: utf-8
 # pylint: skip-file
-import os, unittest
-import numpy as np
+import unittest
+
 import lightgbm as lgb
-from sklearn.metrics import log_loss, mean_squared_error
-from sklearn.datasets import load_breast_cancer, load_boston, load_digits, load_svmlight_file
-from sklearn.model_selection import train_test_split, GridSearchCV
+import numpy as np
 from sklearn.base import clone
+from sklearn.datasets import (load_boston, load_breast_cancer, load_digits,
+                              load_svmlight_file)
 from sklearn.externals import joblib
+from sklearn.metrics import log_loss, mean_squared_error
+from sklearn.model_selection import GridSearchCV, train_test_split
+

 def test_template(X_y=load_boston(True), model=lgb.LGBMRegressor,
                  feval=mean_squared_error, num_round=100,
                  custom_obj=None, predict_proba=False,
                  return_data=False, return_model=False):
    X_train, X_test, y_train, y_test = train_test_split(*X_y, test_size=0.1, random_state=42)
-    if return_data: return X_train, X_test, y_train, y_test
-    arguments = {'n_estimators' : num_round, 'silent' : True}
-    if custom_obj: arguments['objective'] = custom_obj
+    if return_data:
+        return X_train, X_test, y_train, y_test
+    arguments = {'n_estimators': num_round, 'silent': True}
+    if custom_obj:
+        arguments['objective'] = custom_obj
    gbm = model(**arguments)
    gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=10, verbose=False)
-    if return_model: return gbm
-    else: return feval(y_test, gbm.predict_proba(X_test) if predict_proba else gbm.predict(X_test))
+    if return_model:
+        return gbm
+    elif predict_proba:
+        return feval(y_test, gbm.predict_proba(X_test))
+    else:
+        return feval(y_test, gbm.predict(X_test))
+

 class TestSklearn(unittest.TestCase):

    def test_binary(self):
-        X_y= load_breast_cancer(True)
+        X_y = load_breast_cancer(True)
        ret = test_template(X_y, lgb.LGBMClassifier, log_loss, predict_proba=True)
        self.assertLess(ret, 0.15)

@@ -34,6 +44,7 @@ class TestSklearn(unittest.TestCase):

    def test_multiclass(self):
        X_y = load_digits(10, True)
+
        def multi_error(y_true, y_pred):
            return np.mean(y_true != y_pred)
        ret = test_template(X_y, lgb.LGBMClassifier, multi_error)
@@ -67,6 +78,7 @@ class TestSklearn(unittest.TestCase):
            hess = y_pred * (1.0 - y_pred)
            return grad, hess
        X_y = load_digits(2, True)
+
        def binary_error(y_test, y_pred):
            return np.mean([int(p > 0.5) != y for y, p in zip(y_test, y_pred)])
        ret = test_template(X_y, lgb.LGBMClassifier, feval=binary_error, custom_obj=logregobj)
@@ -81,7 +93,8 @@ class TestSklearn(unittest.TestCase):
    def test_grid_search(self):
        X_train, X_test, y_train, y_test = test_template(return_data=True)
        params = {'boosting_type': ['dart', 'gbdt'],
-                  'n_estimators': [15, 20], 'drop_rate':[0.1, 0.2]}
+                  'n_estimators': [15, 20],
+                  'drop_rate': [0.1, 0.2]}
        gbm = GridSearchCV(lgb.LGBMRegressor(), params, cv=3)
        gbm.fit(X_train, y_train)
        self.assertIn(gbm.best_params_['n_estimators'], [15, 20])
@@ -114,6 +127,7 @@ class TestSklearn(unittest.TestCase):
        for preds in zip(pred_origin, pred_pickle):
            self.assertAlmostEqual(*preds, places=5)

+
 print("----------------------------------------------------------------------")
 print("running test_sklearn.py")
 unittest.main()