fix some pep8 check

5b539788 · Guolin Ke · 1a8c23ed · 5b539788 · 5b539788 · 5b539788
Commit 5b539788 authored Dec 01, 2016 by Guolin Ke
5 changed files
--- a/python-package/lightgbm/basic.py
+++ b/python-package/lightgbm/basic.py
@@ -4,8 +4,6 @@ from __future__ import absolute_import
 import sys
 import os
 import ctypes
-import collections
-import re
 import tempfile

 import numpy as np
@@ -59,7 +57,7 @@ def is_1d_list(data):
    if not isinstance(data, list):
        return False
    if len(data) > 0:
-        if not isinstance(data[0], (int, float, bool) ):
+        if not isinstance(data[0], (int, float, bool)):
            return False
    return True

@@ -108,29 +106,29 @@ def param_dict_to_str(data):
        if is_str(val):
            pairs.append(str(key)+'='+str(val))
        elif isinstance(val, (list, tuple)):
-            pairs.append(str(key)+'='+','.join(map(str,val)))
+            pairs.append(str(key)+'='+','.join(map(str, val)))
        elif isinstance(val, (int, float, bool)):
            pairs.append(str(key)+'='+str(val))
        else:
-            raise TypeError('unknow type of parameter:%s , got:%s' %(key, type(val).__name__))
+            raise TypeError('unknow type of parameter:%s , got:%s' 
+                            % (key, type(val).__name__))
    return ' '.join(pairs)
 """marco definition of data type in c_api of LightGBM"""
-C_API_DTYPE_FLOAT32 =0
-C_API_DTYPE_FLOAT64 =1
-C_API_DTYPE_INT32   =2
-C_API_DTYPE_INT64   =3
+C_API_DTYPE_FLOAT32 = 0
+C_API_DTYPE_FLOAT64 = 1
+C_API_DTYPE_INT32 = 2
+C_API_DTYPE_INT64 = 3
 """Matric is row major in python"""
-C_API_IS_ROW_MAJOR  =1
+C_API_IS_ROW_MAJOR = 1

-C_API_PREDICT_NORMAL     =0
-C_API_PREDICT_RAW_SCORE  =1
-C_API_PREDICT_LEAF_INDEX =2
+C_API_PREDICT_NORMAL = 0
+C_API_PREDICT_RAW_SCORE = 1
+C_API_PREDICT_LEAF_INDEX = 2

-FIELD_TYPE_MAPPER = {"label":C_API_DTYPE_FLOAT32, 
-"weight":C_API_DTYPE_FLOAT32, 
-"init_score":C_API_DTYPE_FLOAT32,
-"group":C_API_DTYPE_INT32,
- }
+FIELD_TYPE_MAPPER = {"label": C_API_DTYPE_FLOAT32,
+                     "weight": C_API_DTYPE_FLOAT32,
+                     "init_score": C_API_DTYPE_FLOAT32,
+                     "group": C_API_DTYPE_INT32}

 def c_float_array(data):
    """Convert numpy array / list to c float array."""
@@ -144,7 +142,8 @@ def c_float_array(data):
            ptr_data = data.ctypes.data_as(ctypes.POINTER(ctypes.c_double))
            type_data = C_API_DTYPE_FLOAT64
        else:
-            raise TypeError("expected np.float32 or np.float64, met type({})".format(data.dtype))
+            raise TypeError("expected np.float32 or np.float64, met type({})"
+                            .format(data.dtype))
    else:
        raise TypeError("Unknow type({})".format(type(data).__name__))
    return (ptr_data, type_data)
@@ -161,7 +160,8 @@ def c_int_array(data):
            ptr_data = data.ctypes.data_as(ctypes.POINTER(ctypes.c_int64))
            type_data = C_API_DTYPE_INT64
        else:
-            raise TypeError("expected np.int32 or np.int64, met type({})".format(data.dtype))
+            raise TypeError("expected np.int32 or np.int64, met type({})"
+                            .format(data.dtype))
    else:
        raise TypeError("Unknow type({})".format(type(data).__name__))
    return (ptr_data, type_data)
@@ -169,7 +169,7 @@ def c_int_array(data):
 class Predictor(object):
    """"A Predictor of LightGBM.
    """
-    def __init__(self,model_file=None, booster_handle=None, is_manage_handle=True):
+    def __init__(self, model_file=None, booster_handle=None, is_manage_handle=True):
        """Initialize the Predictor.

        Parameters
@@ -213,7 +213,9 @@ class Predictor(object):
            _safe_call(_LIB.LGBM_BoosterFree(self.handle))


-    def predict(self, data, num_iteration=-1, raw_score=False, pred_leaf=False, data_has_header=False, is_reshape=True):
+    def predict(self, data, num_iteration=-1,
+                raw_score=False, pred_leaf=False, data_has_header=False,
+                is_reshape=True):
        """
        Predict logic

@@ -222,7 +224,7 @@ class Predictor(object):
        data : string/numpy array/scipy.sparse
            Data source for prediction
            When data is string type, it represents the path of txt file,
-        num_iteration : 
+        num_iteration : int
            used iteration for prediction
        raw_score : bool
            True for predict raw score
@@ -238,7 +240,8 @@ class Predictor(object):
        Prediction result
        """
        if isinstance(data, Dataset):
-            raise TypeError("cannot use Dataset instance for prediction, please use raw data instead")
+            raise TypeError("cannot use Dataset instance for prediction, \
+                            please use raw data instead")
        predict_type = C_API_PREDICT_NORMAL
        if raw_score:
            predict_type = C_API_PREDICT_RAW_SCORE
@@ -256,7 +259,7 @@ class Predictor(object):
                predict_type,
                num_iteration,
                c_str(tmp_pred_fname)))
-            tmp_file = open(tmp_pred_fname,"r")
+            tmp_file = open(tmp_pred_fname, "r")
            lines = tmp_file.readlines()
            tmp_file.close()
            nrow = len(lines)
@@ -267,15 +270,19 @@ class Predictor(object):
            preds = np.array(preds, copy=False)
            os.remove(tmp_pred_fname)
        elif isinstance(data, scipy.sparse.csr_matrix):
-            preds, nrow = self.__pred_for_csr(data, num_iteration, predict_type)
+            preds, nrow = self.__pred_for_csr(data, num_iteration,
+                                              predict_type)
        elif isinstance(data, np.ndarray):
-            preds, nrow = self.__pred_for_np2d(data, num_iteration, predict_type)
+            preds, nrow = self.__pred_for_np2d(data, num_iteration,
+                                               predict_type)
        else:
            try:
                csr = scipy.sparse.csr_matrix(data)
-                preds, nrow = self.__pred_for_csr(csr, num_iteration, predict_type)
+                preds, nrow = self.__pred_for_csr(csr, num_iteration,
+                                                  predict_type)
            except:
-                raise TypeError('can not predict data for type {}'.format(type(data).__name__))
+                raise TypeError('can not predict data for type {}'.
+                                format(type(data).__name__))
        if pred_leaf:
            preds = preds.astype(np.int32)
        if preds.size != nrow and is_reshape:
@@ -283,7 +290,8 @@ class Predictor(object):
                ncol = int(preds.size / nrow)
                preds = preds.reshape(nrow, ncol)
            else:
-                raise ValueError('len of predict result(%d) cannot be divide nrow(%d)' %(preds.size, nrow) )
+                raise ValueError('len of predict result(%d) cannot be divide nrow (%d)'
+                                 % (preds.size, nrow))
        return preds

    def __get_num_preds(self, num_iteration, nrow, predict_type):
@@ -308,7 +316,8 @@ class Predictor(object):
            """change non-float data to float data, need to copy"""
            data = np.array(mat.reshape(mat.size), dtype=np.float32)
        ptr_data, type_ptr_data = c_float_array(data)
-        n_preds = self.__get_num_preds(num_iteration, mat.shape[0], predict_type)
+        n_preds = self.__get_num_preds(num_iteration, mat.shape[0],
+                                       predict_type)
        preds = np.zeros(n_preds, dtype=np.float32)
        out_num_preds = ctypes.c_int64(0)
        _safe_call(_LIB.LGBM_BoosterPredictForMat(
@@ -365,10 +374,10 @@ except ImportError:
    class DataFrame(object):
        pass

-PANDAS_DTYPE_MAPPER = {'int8': 'int', 'int16': 'int', 'int32': 'int', 'int64': 'int',
-                       'uint8': 'int', 'uint16': 'int', 'uint32': 'int', 'uint64': 'int',
-                       'float16': 'float', 'float32': 'float', 'float64': 'float',
-                       'bool': 'i'}
+PANDAS_DTYPE_MAPPER = {'int8': 'int', 'int16': 'int', 'int32': 'int',
+                       'int64': 'int', 'uint8': 'int', 'uint16': 'int',
+                       'uint32': 'int', 'uint64': 'int', 'float16': 'float',
+                       'float32': 'float', 'float64': 'float', 'bool': 'i'}

 def _data_from_pandas(data):
    if isinstance(data, DataFrame):
@@ -1098,7 +1107,7 @@ class Booster(object):
        data : string/numpy array/scipy.sparse
            Data source for prediction
            When data is string type, it represents the path of txt file,
-        num_iteration : 
+        num_iteration : int
            used iteration for prediction
        raw_score : bool
            True for predict raw score
@@ -1181,7 +1190,7 @@ class Booster(object):
                ctypes.byref(tmp_out_len),
                data_ptr))
            if tmp_out_len.value != len(self.__inner_predict_buffer[data_idx]):
-                raise ValueError("incorrect number of predict results for data %d" %(data_idx) )
+                raise ValueError("incorrect number of predict results for data %d" % (data_idx) )
            self.__is_predicted_cur_iter[data_idx] = True
        return self.__inner_predict_buffer[data_idx]


--- a/python-package/lightgbm/callback.py
+++ b/python-package/lightgbm/callback.py
@@ -148,7 +148,6 @@ def early_stop(stopping_rounds, verbose=True):
    callback : function
        The requested callback function.
    """
-    state = {}
    factor_to_bigger_better = {}
    best_score = {}
    best_iter = {}
@@ -172,7 +171,6 @@ def early_stop(stopping_rounds, verbose=True):
            factor_to_bigger_better[i] = -1.0
            if env.evaluation_result_list[i][3]:
                factor_to_bigger_better[i] = 1.0
-        state['best_iter'] = 0

    def callback(env):
        """internal function"""
@@ -188,7 +186,6 @@ def early_stop(stopping_rounds, verbose=True):
                        '\t'.join([_format_eval_result(x) for x in env.evaluation_result_list]))
            else:
                if env.iteration - best_iter[i] >= stopping_rounds:
-                    state['best_iter'] = best_iter[i]
                    if env.model is not None:
                        env.model.set_attr(best_iteration=str(best_iter[i]))
                    if verbose:

--- a/python-package/lightgbm/engine.py
+++ b/python-package/lightgbm/engine.py
 """Training Library containing training routines of LightGBM."""
 from __future__ import absolute_import

-import collections
 import numpy as np
 from .basic import LightGBMError, Predictor, Dataset, Booster, is_str
 from . import callback

 def _construct_dataset(X_y, reference=None,
-    params=None, other_fields=None, predictor=None):
+                       params=None, other_fields=None, 
+                       predictor=None):
    if 'max_bin' in params:
        max_bin = int(params['max_bin'])
    else:
@@ -31,10 +31,12 @@ def _construct_dataset(X_y, reference=None,
        label = X_y[1]
    if reference is None:
        ret = Dataset(data, label=label, max_bin=max_bin, 
-        weight=weight, group=group, predictor=predictor, params=params)
+                      weight=weight, group=group,
+                      predictor=predictor, params=params)

    else:
-        ret = reference.create_valid(data, label=label, weight=weight, group=group, params=params)
+        ret = reference.create_valid(data, label=label, weight=weight,
+                                     group=group, params=params)
    if init_score is not None:
        ret.set_init_score(init_score)
    return ret
@@ -409,6 +411,6 @@ def cv(params, train_data, num_boost_round=10, nfold=5, stratified=False,
                                        evaluation_result_list=res))
        except callback.EarlyStopException as e:
            for k in results.keys():
-                results[k] = results[k][:(e.state['best_iter'] + 1)]
+                results[k] = results[k][:(e.best_iteration + 1)]
            break
    return results
--- a/python-package/lightgbm/libpath.py
+++ b/python-package/lightgbm/libpath.py
--- a/python-package/lightgbm/sklearn.py
+++ b/python-package/lightgbm/sklearn.py
@@ -194,7 +194,8 @@ class LGBMModel(LGBMModelBase):
        return params

    def fit(self, X, y, eval_set=None, eval_metric=None,
-            early_stopping_rounds=None, verbose=True, train_fields=None, valid_fields=None, other_params=None):
+            early_stopping_rounds=None, verbose=True,
+            train_fields=None, valid_fields=None, other_params=None):
        """
        Fit the gradient boosting model

@@ -328,8 +329,10 @@ class LGBMClassifier(LGBMModel, LGBMClassifierBase):
        if eval_set is not None:
            eval_set = list( (x[0], self._le.transform(x[1])) for x in eval_set )

-        super(LGBMClassifier, self).fit(X, training_labels, eval_set, eval_metric, 
-            early_stopping_rounds, verbose, train_fields, valid_fields, other_params)
+        super(LGBMClassifier, self).fit(X, training_labels, eval_set,
+                                        eval_metric, early_stopping_rounds,
+                                        verbose, train_fields, valid_fields,
+                                        other_params)
        return self

    def predict(self, data, raw_score=False, num_iteration=0):
@@ -429,5 +432,7 @@ class LGBMRanker(LGBMModel):
            self.fobj = None

        super(LGBMRanker, self).fit(X, y, eval_set, eval_metric,
-            early_stopping_rounds, verbose, train_fields, valid_fields, other_params)
+                                    early_stopping_rounds, verbose,
+                                    train_fields, valid_fields,
+                                    other_params)
        return self