[python] break extremely large lines and fix classname (#1698)

* break extremely large lines in basic.py * break extremely large lines in callback.py * break extremely large lines in engine.py * break extremely large lines in sklearn.py * hotfixes

[python] break extremely large lines and fix classname (#1698)
* break extremely large lines in basic.py * break extremely large lines in callback.py * break extremely large lines in engine.py * break extremely large lines in sklearn.py * hotfixes
7825084f · Nikita Titov · Guolin Ke · a760eae4 · 7825084f · 7825084f
Commit 7825084f authored Sep 25, 2018 by Nikita Titov Committed by Guolin Ke Sep 25, 2018
4 changed files
--- a/python-package/lightgbm/basic.py
+++ b/python-package/lightgbm/basic.py
@@ -134,7 +134,7 @@ def param_dict_to_str(data):
    return ' '.join(pairs)


-class _temp_file(object):
+class _TempFile(object):
    def __enter__(self):
        with NamedTemporaryFile(prefix="lightgbm_tmp_", delete=True) as f:
            self.name = f.name
@@ -192,7 +192,8 @@ def convert_from_sliced_object(data):
    """fix the memory of multi-dimensional sliced object"""
    if data.base is not None and isinstance(data, np.ndarray) and isinstance(data.base, np.ndarray):
        if not data.flags.c_contiguous:
-            warnings.warn("Usage of np.ndarray subset (sliced data) is not recommended due to it will double the peak memory cost in LightGBM.")
+            warnings.warn("Usage of np.ndarray subset (sliced data) is not recommended "
+                          "due to it will double the peak memory cost in LightGBM.")
            return np.copy(data)
    return data

@@ -271,7 +272,8 @@ def _data_from_pandas(data, feature_name, categorical_feature, pandas_categorica
            bad_fields = [data.columns[i] for i, dtype in
                          enumerate(data_dtypes) if dtype.name not in PANDAS_DTYPE_MAPPER]

-            msg = """DataFrame.dtypes for data must be int, float or bool. Did not expect the data types in fields """
+            msg = ("DataFrame.dtypes for data must be int, float or bool.\n"
+                   "Did not expect the data types in fields ")
            raise ValueError(msg + ', '.join(bad_fields))
        data = data.values.astype('float')
    else:
@@ -295,7 +297,8 @@ def _label_from_pandas(label):

 def _save_pandas_categorical(file_name, pandas_categorical):
    with open(file_name, 'a') as f:
-        f.write('\npandas_categorical:' + json.dumps(pandas_categorical, default=json_default_with_numpy) + '\n')
+        f.write('\npandas_categorical:'
+                + json.dumps(pandas_categorical, default=json_default_with_numpy) + '\n')


 def _load_pandas_categorical(file_name):
@@ -418,7 +421,7 @@ class _InnerPredictor(object):
            num_iteration = self.num_total_iteration

        if isinstance(data, string_type):
-            with _temp_file() as f:
+            with _TempFile() as f:
                _safe_call(_LIB.LGBM_BoosterPredictForFile(
                    self.handle,
                    c_str(data),
@@ -521,7 +524,8 @@ class _InnerPredictor(object):
            n_preds = [self.__get_num_preds(num_iteration, i, predict_type) for i in np.diff([0] + list(sections) + [nrow])]
            n_preds_sections = np.array([0] + n_preds, dtype=np.intp).cumsum()
            preds = np.zeros(sum(n_preds), dtype=np.float64)
-            for chunk, (start_idx_pred, end_idx_pred) in zip_(np.array_split(mat, sections), zip_(n_preds_sections, n_preds_sections[1:])):
+            for chunk, (start_idx_pred, end_idx_pred) in zip_(np.array_split(mat, sections),
+                                                              zip_(n_preds_sections, n_preds_sections[1:])):
                # avoid memory consumption by arrays concatenation operations
                inner_predict(chunk, num_iteration, predict_type, preds[start_idx_pred:end_idx_pred])
            return preds, nrow
@@ -692,16 +696,22 @@ class Dataset(object):
        if reference is not None:
            self.pandas_categorical = reference.pandas_categorical
            categorical_feature = reference.categorical_feature
-        data, feature_name, categorical_feature, self.pandas_categorical = _data_from_pandas(data, feature_name, categorical_feature, self.pandas_categorical)
+        data, feature_name, categorical_feature, self.pandas_categorical = _data_from_pandas(data,
+                                                                                             feature_name,
+                                                                                             categorical_feature,
+                                                                                             self.pandas_categorical)
        label = _label_from_pandas(label)
        self.data_has_header = False
        # process for args
        params = {} if params is None else params
-        args_names = getattr(self.__class__, '_lazy_init').__code__.co_varnames[:getattr(self.__class__, '_lazy_init').__code__.co_argcount]
+        args_names = (getattr(self.__class__, '_lazy_init')
+                      .__code__
+                      .co_varnames[:getattr(self.__class__, '_lazy_init').__code__.co_argcount])
        for key, _ in params.items():
            if key in args_names:
-                warnings.warn('{0} keyword has been found in `params` and will be ignored. '
-                              'Please use {0} argument of the Dataset constructor to pass this parameter.'.format(key))
+                warnings.warn('{0} keyword has been found in `params` and will be ignored.\n'
+                              'Please use {0} argument of the Dataset constructor to pass this parameter.'
+                              .format(key))
        self.predictor = predictor
        # user can set verbose with params, it has higher priority
        if not any(verbose_alias in params for verbose_alias in ('verbose', 'verbosity')) and silent:
@@ -930,7 +940,8 @@ class Dataset(object):
                if self.used_indices is None:
                    # create valid
                    self._lazy_init(self.data, label=self.label, reference=self.reference,
-                                    weight=self.weight, group=self.group, init_score=self.init_score, predictor=self._predictor,
+                                    weight=self.weight, group=self.group,
+                                    init_score=self.init_score, predictor=self._predictor,
                                    silent=self.silent, feature_name=self.feature_name, params=self.params)
                else:
                    # construct subset
@@ -938,7 +949,8 @@ class Dataset(object):
                    assert used_indices.flags.c_contiguous
                    if self.reference.group is not None:
                        group_info = np.array(self.reference.group).astype(int)
-                        _, self.group = np.unique(np.repeat(range_(len(group_info)), repeats=group_info)[self.used_indices], return_counts=True)
+                        _, self.group = np.unique(np.repeat(range_(len(group_info)), repeats=group_info)[self.used_indices],
+                                                  return_counts=True)
                    self.handle = ctypes.c_void_p()
                    params_str = param_dict_to_str(self.params)
                    _safe_call(_LIB.LGBM_DatasetGetSubset(
@@ -954,8 +966,9 @@ class Dataset(object):
            else:
                # create train
                self._lazy_init(self.data, label=self.label,
-                                weight=self.weight, group=self.group, init_score=self.init_score,
-                                predictor=self._predictor, silent=self.silent, feature_name=self.feature_name,
+                                weight=self.weight, group=self.group,
+                                init_score=self.init_score, predictor=self._predictor,
+                                silent=self.silent, feature_name=self.feature_name,
                                categorical_feature=self.categorical_feature, params=self.params)
            if self.free_raw_data:
                self.data = None
@@ -1158,11 +1171,13 @@ class Dataset(object):
                warnings.warn('Using categorical_feature in Dataset.')
                return self
            else:
-                warnings.warn('categorical_feature in Dataset is overridden. New categorical_feature is {}'.format(sorted(list(categorical_feature))))
+                warnings.warn('categorical_feature in Dataset is overridden.\n'
+                              'New categorical_feature is {}'.format(sorted(list(categorical_feature))))
                self.categorical_feature = categorical_feature
                return self._free_handle()
        else:
-            raise LightGBMError("Cannot set categorical feature after freed raw data, set free_raw_data=False when construct Dataset to avoid this.")
+            raise LightGBMError("Cannot set categorical feature after freed raw data, "
+                                "set free_raw_data=False when construct Dataset to avoid this.")

    def _set_predictor(self, predictor):
        """
@@ -1175,7 +1190,8 @@ class Dataset(object):
            self._predictor = predictor
            return self._free_handle()
        else:
-            raise LightGBMError("Cannot set predictor after freed raw data, set free_raw_data=False when construct Dataset to avoid this.")
+            raise LightGBMError("Cannot set predictor after freed raw data, "
+                                "set free_raw_data=False when construct Dataset to avoid this.")

    def set_reference(self, reference):
        """Set reference Dataset.
@@ -1190,7 +1206,9 @@ class Dataset(object):
        self : Dataset
            Dataset with set reference.
        """
-        self.set_categorical_feature(reference.categorical_feature).set_feature_name(reference.feature_name)._set_predictor(reference._predictor)
+        self.set_categorical_feature(reference.categorical_feature) \
+            .set_feature_name(reference.feature_name) \
+            ._set_predictor(reference._predictor)
        # we're done if self and reference share a common upstrem reference
        if self.get_ref_chain().intersection(reference.get_ref_chain()):
            return self
@@ -1198,7 +1216,8 @@ class Dataset(object):
            self.reference = reference
            return self._free_handle()
        else:
-            raise LightGBMError("Cannot set reference after freed raw data, set free_raw_data=False when construct Dataset to avoid this.")
+            raise LightGBMError("Cannot set reference after freed raw data, "
+                                "set free_raw_data=False when construct Dataset to avoid this.")

    def set_feature_name(self, feature_name):
        """Set feature name.
@@ -1217,7 +1236,8 @@ class Dataset(object):
            self.feature_name = feature_name
        if self.handle is not None and feature_name is not None and feature_name != 'auto':
            if len(feature_name) != self.num_feature():
-                raise ValueError("Length of feature_name({}) and num_feature({}) don't match".format(len(feature_name), self.num_feature()))
+                raise ValueError("Length of feature_name({}) and num_feature({}) don't match"
+                                 .format(len(feature_name), self.num_feature()))
            c_feature_name = [c_str(name) for name in feature_name]
            _safe_call(_LIB.LGBM_DatasetSetFeatureNames(
                self.handle,
@@ -1445,7 +1465,8 @@ class Booster(object):
        if train_set is not None:
            # Training task
            if not isinstance(train_set, Dataset):
-                raise TypeError('Training data should be Dataset instance, met {}'.format(type(train_set).__name__))
+                raise TypeError('Training data should be Dataset instance, met {}'
+                                .format(type(train_set).__name__))
            params_str = param_dict_to_str(params)
            # construct booster object
            self.handle = ctypes.c_void_p()
@@ -1640,9 +1661,11 @@ class Booster(object):
            Booster with set validation data.
        """
        if not isinstance(data, Dataset):
-            raise TypeError('Validation data should be Dataset instance, met {}'.format(type(data).__name__))
+            raise TypeError('Validation data should be Dataset instance, met {}'
+                            .format(type(data).__name__))
        if data._predictor is not self.__init_predictor:
-            raise LightGBMError("Add validation data failed, you should use same predictor for these data")
+            raise LightGBMError("Add validation data failed, "
+                                "you should use same predictor for these data")
        _safe_call(_LIB.LGBM_BoosterAddValidData(
            self.handle,
            data.construct().handle))
@@ -1700,9 +1723,11 @@ class Booster(object):
        # need reset training data
        if train_set is not None and train_set is not self.train_set:
            if not isinstance(train_set, Dataset):
-                raise TypeError('Training data should be Dataset instance, met {}'.format(type(train_set).__name__))
+                raise TypeError('Training data should be Dataset instance, met {}'
+                                .format(type(train_set).__name__))
            if train_set._predictor is not self.__init_predictor:
-                raise LightGBMError("Replace training data failed, you should use same predictor for these data")
+                raise LightGBMError("Replace training data failed, "
+                                    "you should use same predictor for these data")
            self.train_set = train_set
            _safe_call(_LIB.LGBM_BoosterResetTrainingData(
                self.handle,
@@ -1748,7 +1773,8 @@ class Booster(object):
        assert grad.flags.c_contiguous
        assert hess.flags.c_contiguous
        if len(grad) != len(hess):
-            raise ValueError("Lengths of gradient({}) and hessian({}) don't match".format(len(grad), len(hess)))
+            raise ValueError("Lengths of gradient({}) and hessian({}) don't match"
+                             .format(len(grad), len(hess)))
        is_finished = ctypes.c_int(0)
        _safe_call(_LIB.LGBM_BoosterUpdateOneIterCustom(
            self.handle,
@@ -2051,7 +2077,8 @@ class Booster(object):
                ptr_string_buffer))
        return json.loads(string_buffer.value.decode())

-    def predict(self, data, num_iteration=None, raw_score=False, pred_leaf=False, pred_contrib=False,
+    def predict(self, data, num_iteration=None,
+                raw_score=False, pred_leaf=False, pred_contrib=False,
                data_has_header=False, is_reshape=True, pred_parameter=None, **kwargs):
        """Make a prediction.

@@ -2064,7 +2091,6 @@ class Booster(object):
            Limit number of iterations in the prediction.
            If None, if the best iteration exists, it is used; otherwise, all iterations are used.
            If <= 0, all iterations are used (no limits).
-
        raw_score : bool, optional (default=False)
            Whether to predict raw scores.
        pred_leaf : bool, optional (default=False)
@@ -2093,7 +2119,9 @@ class Booster(object):
        predictor = self._to_predictor(kwargs)
        if num_iteration is None:
            num_iteration = self.best_iteration
-        return predictor.predict(data, num_iteration, raw_score, pred_leaf, pred_contrib, data_has_header, is_reshape)
+        return predictor.predict(data, num_iteration,
+                                 raw_score, pred_leaf, pred_contrib,
+                                 data_has_header, is_reshape)

    def refit(self, data, label, decay_rate=0.9, **kwargs):
        """Refit the existing Booster by new data.
@@ -2106,7 +2134,8 @@ class Booster(object):
        label : list, numpy 1-D array or pandas one-column DataFrame/Series
            Label for refit.
        decay_rate : float, optional (default=0.9)
-            Decay rate of refit, will use ``leaf_output = decay_rate * old_leaf_output + (1.0 - decay_rate) * new_leaf_output`` to refit trees.
+            Decay rate of refit,
+            will use ``leaf_output = decay_rate * old_leaf_output + (1.0 - decay_rate) * new_leaf_output`` to refit trees.
        **kwargs : other parameters for refit
            These parameters will be passed to ``predict`` method.

@@ -2248,7 +2277,8 @@ class Booster(object):
            if tmp_out_len.value != self.__num_inner_eval:
                raise ValueError("Wrong length of eval results")
            for i in range_(self.__num_inner_eval):
-                ret.append((data_name, self.__name_inner_eval[i], result[i], self.__higher_better_inner_eval[i]))
+                ret.append((data_name, self.__name_inner_eval[i],
+                            result[i], self.__higher_better_inner_eval[i]))
        if feval is not None:
            if data_idx == 0:
                cur_data = self.train_set

--- a/python-package/lightgbm/callback.py
+++ b/python-package/lightgbm/callback.py
@@ -133,7 +133,8 @@ def reset_parameter(**kwargs):
                raise RuntimeError("cannot reset {} during training".format(repr(key)))
            if isinstance(value, list):
                if len(value) != env.end_iteration - env.begin_iteration:
-                    raise ValueError("Length of list {} has to equal to 'num_boost_round'.".format(repr(key)))
+                    raise ValueError("Length of list {} has to equal to 'num_boost_round'."
+                                     .format(repr(key)))
                new_param = value[env.iteration - env.begin_iteration]
            else:
                new_param = value(env.iteration - env.begin_iteration)
@@ -180,7 +181,8 @@ def early_stopping(stopping_rounds, verbose=True):
    def init(env):
        """internal function"""
        if not env.evaluation_result_list:
-            raise ValueError('For early stopping, at least one dataset and eval metric is required for evaluation')
+            raise ValueError('For early stopping, '
+                             'at least one dataset and eval metric is required for evaluation')

        if verbose:
            msg = "Training until validation scores don't improve for {} rounds."

--- a/python-package/lightgbm/engine.py
+++ b/python-package/lightgbm/engine.py
@@ -129,7 +129,10 @@ def train(params, train_set, num_boost_round=100,
    if not isinstance(train_set, Dataset):
        raise TypeError("Training only accepts Dataset object")

-    train_set._update_params(params)._set_predictor(predictor).set_feature_name(feature_name).set_categorical_feature(categorical_feature)
+    train_set._update_params(params) \
+             ._set_predictor(predictor) \
+             .set_feature_name(feature_name) \
+             .set_categorical_feature(categorical_feature)

    is_valid_contain_train = False
    train_data_name = "training"
@@ -341,7 +344,7 @@ def cv(params, train_set, num_boost_round=100,
        Data to be trained on.
    num_boost_round : int, optional (default=100)
        Number of boosting iterations.
-    folds : a generator or iterator of (train_idx, test_idx) tuples, scikit-learn splitter object or None, optional (default=None)
+    folds : generator or iterator of (train_idx, test_idx) tuples, scikit-learn splitter object or None, optional (default=None)
        If generator or iterator, it should yield the train and test indices for the each fold.
        If object, it should be one of the scikit-learn splitter classes
        (http://scikit-learn.org/stable/modules/classes.html#splitter-classes)
@@ -434,7 +437,10 @@ def cv(params, train_set, num_boost_round=100,
        predictor = init_model._to_predictor(dict(init_model.params, **params))
    else:
        predictor = None
-    train_set._update_params(params)._set_predictor(predictor).set_feature_name(feature_name).set_categorical_feature(categorical_feature)
+    train_set._update_params(params) \
+             ._set_predictor(predictor) \
+             .set_feature_name(feature_name) \
+             .set_categorical_feature(categorical_feature)

    if metrics is not None:
        params['metric'] = metrics

--- a/python-package/lightgbm/sklearn.py
+++ b/python-package/lightgbm/sklearn.py
@@ -469,7 +469,8 @@ class LGBMModel(_LGBMModelBase):
                elif isinstance(collection, dict):
                    return collection.get(i, None)
                else:
-                    raise TypeError('eval_sample_weight, eval_class_weight, eval_init_score, and eval_group should be dict or list')
+                    raise TypeError('eval_sample_weight, eval_class_weight, eval_init_score, and eval_group '
+                                    'should be dict or list')

            if isinstance(eval_set, tuple):
                eval_set = [eval_set]
@@ -480,14 +481,16 @@ class LGBMModel(_LGBMModelBase):
                else:
                    valid_weight = _get_meta_data(eval_sample_weight, i)
                    if _get_meta_data(eval_class_weight, i) is not None:
-                        valid_class_sample_weight = _LGBMComputeSampleWeight(_get_meta_data(eval_class_weight, i), valid_data[1])
+                        valid_class_sample_weight = _LGBMComputeSampleWeight(_get_meta_data(eval_class_weight, i),
+                                                                             valid_data[1])
                        if valid_weight is None or len(valid_weight) == 0:
                            valid_weight = valid_class_sample_weight
                        else:
                            valid_weight = np.multiply(valid_weight, valid_class_sample_weight)
                    valid_init_score = _get_meta_data(eval_init_score, i)
                    valid_group = _get_meta_data(eval_group, i)
-                    valid_set = _construct_dataset(valid_data[0], valid_data[1], valid_weight, valid_init_score, valid_group, params)
+                    valid_set = _construct_dataset(valid_data[0], valid_data[1],
+                                                   valid_weight, valid_init_score, valid_group, params)
                valid_sets.append(valid_set)

        self._Booster = train(params, train_set,
@@ -786,8 +789,10 @@ class LGBMRanker(LGBMModel):
                raise ValueError("Eval_group cannot be None when eval_set is not None")
            elif len(eval_group) != len(eval_set):
                raise ValueError("Length of eval_group should be equal to eval_set")
-            elif (isinstance(eval_group, dict) and any(i not in eval_group or eval_group[i] is None for i in range_(len(eval_group)))) \
-                    or (isinstance(eval_group, list) and any(group is None for group in eval_group)):
+            elif (isinstance(eval_group, dict)
+                  and any(i not in eval_group or eval_group[i] is None for i in range_(len(eval_group)))
+                  or isinstance(eval_group, list)
+                  and any(group is None for group in eval_group)):
                raise ValueError("Should set group for all eval datasets for ranking task; "
                                 "if you use dict, the index should start from 0")