return self (#1602)

244db078 · Nikita Titov · Qiwei Ye · dcf9ad2e · 244db078 · 244db078
Commit 244db078 authored Aug 25, 2018 by Nikita Titov Committed by Qiwei Ye Aug 25, 2018
4 changed files
--- a/examples/python-guide/advanced_example.py
+++ b/examples/python-guide/advanced_example.py
@@ -64,7 +64,7 @@ print('7th feature name is:', repr(lgb_train.feature_name[6]))
 # save model to file
 gbm.save_model('model.txt')
-# dump model to json (and save to file)
+# dump model to JSON (and save to file)
 print('Dump model to JSON...')
 model_json = gbm.dump_model()

--- a/python-package/lightgbm/basic.py
+++ b/python-package/lightgbm/basic.py
@@ -184,7 +184,7 @@ def convert_from_sliced_object(data):
    """fix the memory of multi-dimensional sliced object"""
    if data.base is not None and isinstance(data, np.ndarray) and isinstance(data.base, np.ndarray):
        if not data.flags.c_contiguous:
-            warnings.warn("Usage subset(sliced data) of np.ndarray is not recommended due to it will double the peak memory cost in LightGBM.")
+            warnings.warn("Usage of np.ndarray subset (sliced data) is not recommended due to it will double the peak memory cost in LightGBM.")
            return np.copy(data)
    return data
@@ -607,9 +607,9 @@ class Dataset(object):
            If 'auto' and data is pandas DataFrame, pandas categorical columns are used.
            All values in categorical features should be less than int32 max value (2147483647).
            All negative values in categorical features will be treated as missing values.
-        params: dict or None, optional (default=None)
+        params : dict or None, optional (default=None)
            Other parameters.
-        free_raw_data: bool, optional (default=True)
+        free_raw_data : bool, optional (default=True)
            If True, raw data is freed after constructing inner Dataset.
        """
        self.handle = None
@@ -639,6 +639,7 @@ class Dataset(object):
        if self.handle is not None:
            _safe_call(_LIB.LGBM_DatasetFree(self.handle))
            self.handle = None
+        return self
    def _lazy_init(self, data, label=None, reference=None,
                   weight=None, group=None, init_score=None, predictor=None,
@@ -646,7 +647,7 @@ class Dataset(object):
                   categorical_feature='auto', params=None):
        if data is None:
            self.handle = None
-            return
+            return self
        if reference is not None:
            self.pandas_categorical = reference.pandas_categorical
            categorical_feature = reference.categorical_feature
@@ -747,7 +748,7 @@ class Dataset(object):
        elif self.predictor is not None:
            raise TypeError('wrong predictor type {}'.format(type(self.predictor).__name__))
        # set feature names
-        self.set_feature_name(feature_name)
+        return self.set_feature_name(feature_name)
    def __init_from_np2d(self, mat, params_str, ref_dataset):
        """
@@ -773,6 +774,7 @@ class Dataset(object):
            c_str(params_str),
            ref_dataset,
            ctypes.byref(self.handle)))
+        return self
    def __init_from_list_np2d(self, mats, params_str, ref_dataset):
        """
@@ -821,6 +823,7 @@ class Dataset(object):
            c_str(params_str),
            ref_dataset,
            ctypes.byref(self.handle)))
+        return self
    def __init_from_csr(self, csr, params_str, ref_dataset):
        """
@@ -845,6 +848,7 @@ class Dataset(object):
            c_str(params_str),
            ref_dataset,
            ctypes.byref(self.handle)))
+        return self
    def __init_from_csc(self, csc, params_str, ref_dataset):
        """
@@ -869,6 +873,7 @@ class Dataset(object):
            c_str(params_str),
            ref_dataset,
            ctypes.byref(self.handle)))
+        return self
    def construct(self):
        """Lazy init.
@@ -876,7 +881,7 @@ class Dataset(object):
        Returns
        -------
        self : Dataset
-            Returns self.
+            Constructed Dataset object.
        """
        if self.handle is None:
            if self.reference is not None:
@@ -928,13 +933,13 @@ class Dataset(object):
            Init score for Dataset.
        silent : bool, optional (default=False)
            Whether to print messages during construction.
-        params: dict or None, optional (default=None)
+        params : dict or None, optional (default=None)
            Other parameters.
        Returns
        -------
-        self : Dataset
+        valid : Dataset
-            Returns self.
+            Validation Dataset with reference to self.
        """
        ret = Dataset(data, label=label, reference=self,
                      weight=weight, group=group, init_score=init_score,
@@ -950,7 +955,7 @@ class Dataset(object):
        ----------
        used_indices : list of int
            Indices used to create the subset.
-        params: dict or None, optional (default=None)
+        params : dict or None, optional (default=None)
            Other parameters.
        Returns
@@ -974,10 +979,16 @@ class Dataset(object):
        ----------
        filename : string
            Name of the output file.
+        Returns
+        -------
+        self : Dataset
+            Returns self.
        """
        _safe_call(_LIB.LGBM_DatasetSaveBinary(
            self.construct().handle,
            c_str(filename)))
+        return self
    def _update_params(self, params):
        if not self.params:
@@ -985,20 +996,27 @@ class Dataset(object):
        else:
            self.params_back_up = copy.deepcopy(self.params)
            self.params.update(params)
+        return self
    def _reverse_update_params(self):
        self.params = copy.deepcopy(self.params_back_up)
        self.params_back_up = None
+        return self
    def set_field(self, field_name, data):
        """Set property into the Dataset.
        Parameters
        ----------
-        field_name: string
+        field_name : string
            The field name of the information.
-        data: list, numpy array or None
+        data : list, numpy array or None
            The array of data to be set.
+        Returns
+        -------
+        self : Dataset
+            Dataset with set property.
        """
        if self.handle is None:
            raise Exception("Cannot set %s before construct dataset" % field_name)
@@ -1010,7 +1028,7 @@ class Dataset(object):
                None,
                ctypes.c_int(0),
                ctypes.c_int(FIELD_TYPE_MAPPER[field_name])))
-            return
+            return self
        dtype = np.float32
        if field_name == 'group':
            dtype = np.int32
@@ -1031,13 +1049,14 @@ class Dataset(object):
            ptr_data,
            ctypes.c_int(len(data)),
            ctypes.c_int(type_data)))
+        return self
    def get_field(self, field_name):
        """Get property from the Dataset.
        Parameters
        ----------
-        field_name: string
+        field_name : string
            The field name of the information.
        Returns
@@ -1076,19 +1095,25 @@ class Dataset(object):
        ----------
        categorical_feature : list of int or strings
            Names or indices of categorical features.
+        Returns
+        -------
+        self : Dataset
+            Dataset with set categorical features.
        """
        if self.categorical_feature == categorical_feature:
-            return
+            return self
        if self.data is not None:
            if self.categorical_feature is None:
                self.categorical_feature = categorical_feature
-                self._free_handle()
+                return self._free_handle()
            elif categorical_feature == 'auto':
                warnings.warn('Using categorical_feature in Dataset.')
+                return self
            else:
                warnings.warn('categorical_feature in Dataset is overridden. New categorical_feature is {}'.format(sorted(list(categorical_feature))))
                self.categorical_feature = categorical_feature
-                self._free_handle()
+                return self._free_handle()
        else:
            raise LightGBMError("Cannot set categorical feature after freed raw data, set free_raw_data=False when construct Dataset to avoid this.")
@@ -1098,10 +1123,10 @@ class Dataset(object):
        Please set init_model in engine.train or engine.cv
        """
        if predictor is self._predictor:
-            return
+            return self
        if self.data is not None:
            self._predictor = predictor
-            self._free_handle()
+            return self._free_handle()
        else:
            raise LightGBMError("Cannot set predictor after freed raw data, set free_raw_data=False when construct Dataset to avoid this.")
@@ -1112,16 +1137,19 @@ class Dataset(object):
        ----------
        reference : Dataset
            Reference that is used as a template to consturct the current Dataset.
+        Returns
+        -------
+        self : Dataset
+            Dataset with set reference.
        """
-        self.set_categorical_feature(reference.categorical_feature)
+        self.set_categorical_feature(reference.categorical_feature).set_feature_name(reference.feature_name)._set_predictor(reference._predictor)
-        self.set_feature_name(reference.feature_name)
-        self._set_predictor(reference._predictor)
        # we're done if self and reference share a common upstrem reference
        if self.get_ref_chain().intersection(reference.get_ref_chain()):
-            return
+            return self
        if self.data is not None:
            self.reference = reference
-            self._free_handle()
+            return self._free_handle()
        else:
            raise LightGBMError("Cannot set reference after freed raw data, set free_raw_data=False when construct Dataset to avoid this.")
@@ -1132,6 +1160,11 @@ class Dataset(object):
        ----------
        feature_name : list of strings
            Feature names.
+        Returns
+        -------
+        self : Dataset
+            Dataset with set feature name.
        """
        if feature_name != 'auto':
            self.feature_name = feature_name
@@ -1143,19 +1176,26 @@ class Dataset(object):
                self.handle,
                c_array(ctypes.c_char_p, c_feature_name),
                ctypes.c_int(len(feature_name))))
+        return self
    def set_label(self, label):
        """Set label of Dataset
        Parameters
        ----------
-        label: list, numpy array or None
+        label : list, numpy array or None
            The label information to be set into Dataset.
+        Returns
+        -------
+        self : Dataset
+            Dataset with set label.
        """
        self.label = label
        if self.handle is not None:
            label = list_to_1d_numpy(label, name='label')
            self.set_field('label', label)
+        return self
    def set_weight(self, weight):
        """Set weight of each instance.
@@ -1164,6 +1204,11 @@ class Dataset(object):
        ----------
        weight : list, numpy array or None
            Weight to be set for each data point.
+        Returns
+        -------
+        self : Dataset
+            Dataset with set weight.
        """
        if weight is not None and np.all(weight == 1):
            weight = None
@@ -1171,6 +1216,7 @@ class Dataset(object):
        if self.handle is not None and weight is not None:
            weight = list_to_1d_numpy(weight, name='weight')
            self.set_field('weight', weight)
+        return self
    def set_init_score(self, init_score):
        """Set init score of Booster to start from.
@@ -1179,11 +1225,17 @@ class Dataset(object):
        ----------
        init_score : list, numpy array or None
            Init score for Booster.
+        Returns
+        -------
+        self : Dataset
+            Dataset with set init score.
        """
        self.init_score = init_score
        if self.handle is not None and init_score is not None:
            init_score = list_to_1d_numpy(init_score, np.float64, name='init_score')
            self.set_field('init_score', init_score)
+        return self
    def set_group(self, group):
        """Set group size of Dataset (used for ranking).
@@ -1192,18 +1244,24 @@ class Dataset(object):
        ----------
        group : list, numpy array or None
            Group size of each group.
+        Returns
+        -------
+        self : Dataset
+            Dataset with set group.
        """
        self.group = group
        if self.handle is not None and group is not None:
            group = list_to_1d_numpy(group, np.int32, name='group')
            self.set_field('group', group)
+        return self
    def get_label(self):
        """Get the label of the Dataset.
        Returns
        -------
-        label : numpy array
+        label : numpy array or None
            The label information from the Dataset.
        """
        if self.label is None:
@@ -1215,7 +1273,7 @@ class Dataset(object):
        Returns
        -------
-        weight : numpy array
+        weight : numpy array or None
            Weight for each data point from the Dataset.
        """
        if self.weight is None:
@@ -1227,7 +1285,7 @@ class Dataset(object):
        Returns
        -------
-        init_score : numpy array
+        init_score : numpy array or None
            Init score of Booster.
        """
        if self.init_score is None:
@@ -1239,17 +1297,14 @@ class Dataset(object):
        Returns
        -------
-        group : numpy array
+        group : numpy array or None
            Group size of each group.
        """
        if self.group is None:
            self.group = self.get_field('group')
            if self.group is not None:
                # group data from LightGBM is boundaries data, need to convert to group size
-                new_group = []
+                self.group = np.diff(self.group)
-                for i in range_(len(self.group) - 1):
-                    new_group.append(self.group[i + 1] - self.group[i])
-                self.group = new_group
        return self.group
    def num_data(self):
@@ -1309,7 +1364,7 @@ class Dataset(object):
                    break
            else:
                break
-        return(ref_chain)
+        return ref_chain
 class Booster(object):
@@ -1319,7 +1374,7 @@ class Booster(object):
        Parameters
        ----------
-        params: dict or None, optional (default=None)
+        params : dict or None, optional (default=None)
            Parameters for Booster.
        train_set : Dataset or None, optional (default=None)
            Training dataset.
@@ -1448,14 +1503,22 @@ class Booster(object):
        self.__dict__.update(state)
    def free_dataset(self):
-        """Free Booster's Datasets."""
+        """Free Booster's Datasets.
+        Returns
+        -------
+        self : Booster
+            Booster without Datasets.
+        """
        self.__dict__.pop('train_set', None)
        self.__dict__.pop('valid_sets', None)
        self.__num_dataset = 0
+        return self
    def _free_buffer(self):
        self.__inner_predict_buffer = []
        self.__is_predicted_cur_iter = []
+        return self
    def set_network(self, machines, local_listen_port=12400,
                    listen_time_out=120, num_machines=1):
@@ -1463,35 +1526,54 @@ class Booster(object):
        Parameters
        ----------
-        machines: list, set or string
+        machines : list, set or string
            Names of machines.
-        local_listen_port: int, optional (default=12400)
+        local_listen_port : int, optional (default=12400)
            TCP listen port for local machines.
-        listen_time_out: int, optional (default=120)
+        listen_time_out : int, optional (default=120)
            Socket time-out in minutes.
-        num_machines: int, optional (default=1)
+        num_machines : int, optional (default=1)
            The number of machines for parallel learning application.
+        Returns
+        -------
+        self : Booster
+            Booster with set network.
        """
        _safe_call(_LIB.LGBM_NetworkInit(c_str(machines),
                                         ctypes.c_int(local_listen_port),
                                         ctypes.c_int(listen_time_out),
                                         ctypes.c_int(num_machines)))
        self.network = True
+        return self
    def free_network(self):
-        """Free network."""
+        """Free Booster's network.
+        Returns
+        -------
+        self : Booster
+            Booster with freed network.
+        """
        _safe_call(_LIB.LGBM_NetworkFree())
        self.network = False
+        return self
    def set_train_data_name(self, name):
        """Set the name to the training Dataset.
        Parameters
        ----------
-        name: string
+        name : string
-            Name for training Dataset.
+            Name for the training Dataset.
+        Returns
+        -------
+        self : Booster
+            Booster with set training Dataset name.
        """
        self.__train_data_name = name
+        return self
    def add_valid(self, data, name):
        """Add validation data.
@@ -1502,6 +1584,11 @@ class Booster(object):
            Validation data.
        name : string
            Name of validation data.
+        Returns
+        -------
+        self : Booster
+            Booster with set validation data.
        """
        if not isinstance(data, Dataset):
            raise TypeError('Validation data should be Dataset instance, met {}'.format(type(data).__name__))
@@ -1515,6 +1602,7 @@ class Booster(object):
        self.__num_dataset += 1
        self.__inner_predict_buffer.append(None)
        self.__is_predicted_cur_iter.append(False)
+        return self
    def reset_parameter(self, params):
        """Reset parameters of Booster.
@@ -1523,6 +1611,11 @@ class Booster(object):
        ----------
        params : dict
            New parameters for Booster.
+        Returns
+        -------
+        self : Booster
+            Booster with new parameters.
        """
        if any(metric_alias in params for metric_alias in ('metric', 'metrics', 'metric_types')):
            self.__need_reload_eval_info = True
@@ -1531,9 +1624,10 @@ class Booster(object):
            _safe_call(_LIB.LGBM_BoosterResetParameter(
                self.handle,
                c_str(params_str)))
+        return self
    def update(self, train_set=None, fobj=None):
-        """Update for one iteration.
+        """Update Booster for one iteration.
        Parameters
        ----------
@@ -1575,28 +1669,29 @@ class Booster(object):
            return is_finished.value == 1
        else:
            if not self.__set_objective_to_none:
-                self.reset_parameter({"objective": "none"})
+                self.reset_parameter({"objective": "none"}).__set_objective_to_none = True
-                self.__set_objective_to_none = True
            grad, hess = fobj(self.__inner_predict(0), self.train_set)
            return self.__boost(grad, hess)
    def __boost(self, grad, hess):
        """
-        Boost the booster for one iteration, with customized gradient statistics.
+        Boost Booster for one iteration with customized gradient statistics.
-        Note: for multi-class task, the score is group by class_id first, then group by row_id
-              if you want to get i-th row score in j-th class, the access way is score[j*num_data+i]
+        Note: For multi-class task, the score is group by class_id first, then group by row_id.
-              and you should group grad and hess in this way as well
+              If you want to get i-th row score in j-th class, the access way is score[j * num_data + i]
+              and you should group grad and hess in this way as well.
        Parameters
        ----------
-        grad : 1d numpy or 1d list
+        grad : 1d numpy array or list
-            The first order of gradient.
+            The first order derivative (gradient).
        hess : 1d numpy or 1d list
-            The second order of gradient.
+            The second order derivative (Hessian).
        Returns
        -------
-        is_finished, bool
+        is_finished : bool
+            Whether the boost was successfully finished.
        """
        grad = list_to_1d_numpy(grad, name='gradient')
        hess = list_to_1d_numpy(hess, name='hessian')
@@ -1614,10 +1709,17 @@ class Booster(object):
        return is_finished.value == 1
    def rollback_one_iter(self):
-        """Rollback one iteration."""
+        """Rollback one iteration.
+        Returns
+        -------
+        self : Booster
+            Booster with rolled back one iteration.
+        """
        _safe_call(_LIB.LGBM_BoosterRollbackOneIter(
            self.handle))
        self.__is_predicted_cur_iter = [False for _ in range_(self.__num_dataset)]
+        return self
    def current_iteration(self):
        """Get the index of the current iteration.
@@ -1651,7 +1753,7 @@ class Booster(object):
        Returns
        -------
-        result: list
+        result : list
            List with evaluation results.
        """
        if not isinstance(data, Dataset):
@@ -1685,7 +1787,7 @@ class Booster(object):
        Returns
        -------
-        result: list
+        result : list
            List with evaluation results.
        """
        return self.__inner_eval(self.__train_data_name, 0, feval)
@@ -1704,7 +1806,7 @@ class Booster(object):
        Returns
        -------
-        result: list
+        result : list
            List with evaluation results.
        """
        return [item for i in range_(1, self.__num_dataset)
@@ -1721,8 +1823,13 @@ class Booster(object):
            Index of the iteration that should be saved.
            If None, if the best iteration exists, it is saved; otherwise, all iterations are saved.
            If <= 0, all iterations are saved.
-        start_iteration: int, optional (default=0)
+        start_iteration : int, optional (default=0)
            Start index of the iteration that should be saved.
+        Returns
+        -------
+        self : Booster
+            Returns self.
        """
        if num_iteration is None:
            num_iteration = self.best_iteration
@@ -1732,25 +1839,32 @@ class Booster(object):
            ctypes.c_int(num_iteration),
            c_str(filename)))
        _save_pandas_categorical(filename, self.pandas_categorical)
+        return self
    def shuffle_models(self):
        """Shuffle models.
+        Returns
+        -------
+        self : Booster
+            Booster with shuffled models.
        """
        _safe_call(_LIB.LGBM_BoosterShuffleModels(self.handle))
+        return self
    def model_from_string(self, model_str, verbose=True):
        """Load Booster from a string.
        Parameters
        ----------
-        model_str: string
+        model_str : string
            Model will be loaded from this string.
-        verbose: bool, optional (default=True)
+        verbose : bool, optional (default=True)
-            Set to False to disable log when loading model.
+            Whether to print messages while loading model.
        Returns
        -------
-        result: Booster
+        self : Booster
            Loaded Booster object.
        """
        if self.handle is not None:
@@ -1767,7 +1881,7 @@ class Booster(object):
            self.handle,
            ctypes.byref(out_num_class)))
        if verbose:
-            print('Finished loading model, total used %d iterations' % (int(out_num_iterations.value)))
+            print('Finished loading model, total used %d iterations' % int(out_num_iterations.value))
        self.__num_class = out_num_class.value
        return self
@@ -1780,12 +1894,12 @@ class Booster(object):
            Index of the iteration that should be saved.
            If None, if the best iteration exists, it is saved; otherwise, all iterations are saved.
            If <= 0, all iterations are saved.
-        start_iteration: int, optional (default=0)
+        start_iteration : int, optional (default=0)
            Start index of the iteration that should be saved.
        Returns
        -------
-        result: string
+        str_repr : string
            String representation of Booster.
        """
        if num_iteration is None:
@@ -1816,7 +1930,7 @@ class Booster(object):
        return string_buffer.value.decode()
    def dump_model(self, num_iteration=None, start_iteration=0):
-        """Dump Booster to json format.
+        """Dump Booster to JSON format.
        Parameters
        ----------
@@ -1824,13 +1938,13 @@ class Booster(object):
            Index of the iteration that should be dumped.
            If None, if the best iteration exists, it is dumped; otherwise, all iterations are dumped.
            If <= 0, all iterations are dumped.
-        start_iteration: int, optional (default=0)
+        start_iteration : int, optional (default=0)
            Start index of the iteration that should be dumped.
        Returns
        -------
        json_repr : dict
-            Json format of Booster.
+            JSON format of Booster.
        """
        if num_iteration is None:
            num_iteration = self.best_iteration
@@ -1990,8 +2104,7 @@ class Booster(object):
            importance_type_int = 1
        else:
            importance_type_int = -1
-        num_feature = self.num_feature()
+        result = np.zeros(self.num_feature(), dtype=np.float64)
-        result = np.array([0 for _ in range_(num_feature)], dtype=np.float64)
        _safe_call(_LIB.LGBM_BoosterFeatureImportance(
            self.handle,
            ctypes.c_int(iteration),
@@ -2004,7 +2117,7 @@ class Booster(object):
    def __inner_eval(self, data_name, data_idx, feval=None):
        """
-        Evaulate training or validation data
+        Evaluate training or validation data
        """
        if data_idx >= self.__num_dataset:
            raise ValueError("Data_idx should be smaller than number of dataset")
@@ -2102,7 +2215,7 @@ class Booster(object):
        -------
        value : string or None
            The attribute value.
-            Returns None if attribute do not exist.
+            Returns None if attribute does not exist.
        """
        return self.__attr.get(key, None)
@@ -2114,11 +2227,17 @@ class Booster(object):
        **kwargs
            The attributes to set.
            Setting a value to None deletes an attribute.
+        Returns
+        -------
+        self : Booster
+            Booster with set attribute.
        """
        for key, value in kwargs.items():
            if value is not None:
                if not isinstance(value, string_type):
-                    raise ValueError("Set attr only accepts strings")
+                    raise ValueError("Only string values are accepted")
                self.__attr[key] = value
            else:
                self.__attr.pop(key, None)
+        return self
--- a/python-package/lightgbm/engine.py
+++ b/python-package/lightgbm/engine.py
@@ -125,10 +125,7 @@ def train(params, train_set, num_boost_round=100,
    if not isinstance(train_set, Dataset):
        raise TypeError("Training only accepts Dataset object")
-    train_set._update_params(params)
+    train_set._update_params(params)._set_predictor(predictor).set_feature_name(feature_name).set_categorical_feature(categorical_feature)
-    train_set._set_predictor(predictor)
-    train_set.set_feature_name(feature_name)
-    train_set.set_categorical_feature(categorical_feature)
    is_valid_contain_train = False
    train_data_name = "training"
@@ -148,9 +145,7 @@ def train(params, train_set, num_boost_round=100,
                continue
            if not isinstance(valid_data, Dataset):
                raise TypeError("Traninig only accepts Dataset object")
-            valid_data._update_params(params)
+            reduced_valid_sets.append(valid_data._update_params(params).set_reference(train_set))
-            valid_data.set_reference(train_set)
-            reduced_valid_sets.append(valid_data)
            if valid_names is not None and len(valid_names) > i:
                name_valid_sets.append(valid_names[i])
            else:
@@ -230,8 +225,7 @@ def train(params, train_set, num_boost_round=100,
    for dataset_name, eval_name, score, _ in evaluation_result_list:
        booster.best_score[dataset_name][eval_name] = score
    if not keep_training_booster:
-        booster.model_from_string(booster.model_to_string(), False)
+        booster.model_from_string(booster.model_to_string(), False).free_dataset()
-        booster.free_dataset()
    return booster
@@ -421,10 +415,7 @@ def cv(params, train_set, num_boost_round=100,
        predictor = init_model._to_predictor()
    else:
        predictor = None
-    train_set._update_params(params)
+    train_set._update_params(params)._set_predictor(predictor).set_feature_name(feature_name).set_categorical_feature(categorical_feature)
-    train_set._set_predictor(predictor)
-    train_set.set_feature_name(feature_name)
-    train_set.set_categorical_feature(categorical_feature)
    if metrics is not None:
        params['metric'] = metrics

--- a/python-package/lightgbm/sklearn.py
+++ b/python-package/lightgbm/sklearn.py
@@ -23,24 +23,24 @@ def _objective_function_wrapper(func):
    Parameters
    ----------
-    func: callable
+    func : callable
        Expects a callable with signature ``func(y_true, y_pred)`` or ``func(y_true, y_pred, group):
-            y_true: array-like of shape = [n_samples]
+            y_true : array-like of shape = [n_samples]
                The target values.
-            y_pred: array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class)
+            y_pred : array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class)
                The predicted values.
-            group: array-like
+            group : array-like
                Group/query data, used for ranking task.
    Returns
    -------
-    new_func: callable
+    new_func : callable
        The new objective function as expected by ``lightgbm.engine.train``.
        The signature is ``new_func(preds, dataset)``:
-        preds: array-like of shape = [n_samples] or shape = [n_samples * n_classes]
+        preds : array-like of shape = [n_samples] or shape = [n_samples * n_classes]
            The predicted values.
-        dataset: ``dataset``
+        dataset : ``dataset``
            The training set from which the labels will be extracted using
            ``dataset.get_label()``.
    """
@@ -82,31 +82,31 @@ def _eval_function_wrapper(func):
    Parameters
    ----------
-    func: callable
+    func : callable
        Expects a callable with following functions:
            ``func(y_true, y_pred)``,
            ``func(y_true, y_pred, weight)``
         or ``func(y_true, y_pred, weight, group)``
            and return (eval_name->str, eval_result->float, is_bigger_better->Bool):
-            y_true: array-like of shape = [n_samples]
+            y_true : array-like of shape = [n_samples]
                The target values.
-            y_pred: array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class)
+            y_pred : array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class)
                The predicted values.
-            weight: array_like of shape = [n_samples]
+            weight : array_like of shape = [n_samples]
                The weight of samples.
-            group: array-like
+            group : array-like
                Group/query data, used for ranking task.
    Returns
    -------
-    new_func: callable
+    new_func : callable
        The new eval function as expected by ``lightgbm.engine.train``.
        The signature is ``new_func(preds, dataset)``:
-        preds: array-like of shape = [n_samples] or shape = [n_samples * n_classes]
+        preds : array-like of shape = [n_samples] or shape = [n_samples * n_classes]
            The predicted values.
-        dataset: ``dataset``
+        dataset : ``dataset``
            The training set from which the labels will be extracted using
            ``dataset.get_label()``.
    """
@@ -232,15 +232,15 @@ class LGBMModel(_LGBMModelBase):
        ``objective(y_true, y_pred) -> grad, hess`` or
        ``objective(y_true, y_pred, group) -> grad, hess``:
-            y_true: array-like of shape = [n_samples]
+            y_true : array-like of shape = [n_samples]
                The target values.
-            y_pred: array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task)
+            y_pred : array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task)
                The predicted values.
-            group: array-like
+            group : array-like
                Group/query data, used for ranking task.
-            grad: array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task)
+            grad : array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task)
                The value of the gradient for each sample point.
-            hess: array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task)
+            hess : array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class task)
                The value of the second derivative for each sample point.
        For multi-class task, the y_pred is group by class_id first, then group by row_id.
@@ -365,19 +365,19 @@ class LGBMModel(_LGBMModelBase):
        Returns (eval_name, eval_result, is_bigger_better) or
        list of (eval_name, eval_result, is_bigger_better)
-            y_true: array-like of shape = [n_samples]
+            y_true : array-like of shape = [n_samples]
                The target values.
-            y_pred: array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class)
+            y_pred : array-like of shape = [n_samples] or shape = [n_samples * n_classes] (for multi-class)
                The predicted values.
-            weight: array-like of shape = [n_samples]
+            weight : array-like of shape = [n_samples]
                The weight of samples.
-            group: array-like
+            group : array-like
                Group/query data, used for ranking task.
-            eval_name: string
+            eval_name : string
                The name of evaluation.
-            eval_result: float
+            eval_result : float
                The eval result.
-            is_bigger_better: bool
+            is_bigger_better : bool
                Is eval result bigger better, e.g. AUC is bigger_better.
        For multi-class task, the y_pred is group by class_id first, then group by row_id.
@@ -434,8 +434,7 @@ class LGBMModel(_LGBMModelBase):
        def _construct_dataset(X, y, sample_weight, init_score, group, params):
            ret = Dataset(X, label=y, weight=sample_weight, group=group, params=params)
-            ret.set_init_score(init_score)
+            return ret.set_init_score(init_score)
-            return ret
        train_set = _construct_dataset(X, y, sample_weight, init_score, group, params)