`_is_constructed` -> `handle is not None`; add FAQ for docs (#173)

* use handle is not None for _is_constructed * sort imports; clean code; move FAQ to docs

`_is_constructed` -> `handle is not None`; add FAQ for docs (#173)
* use handle is not None for _is_constructed * sort imports; clean code; move FAQ to docs
1b7643ba · wxchan · Guolin Ke · f3f2f5a9 · 1b7643ba · 1b7643ba
Commit 1b7643ba authored Jan 08, 2017 by wxchan Committed by Guolin Ke Jan 08, 2017
8 changed files
--- a/docs/FAQ.md
+++ b/docs/FAQ.md
+LightGBM FAQ
+=======================
+
+###Catalog
+
+- [Python-package](FAQ.md#python-package)
+
+###Python-package
+
+- **Question 1**: I see error messages like this when install from github using `python setup.py install`.
+
+    ```
+    error: Error: setup script specifies an absolute path:
+
+    /Users/Microsoft/LightGBM/python-package/lightgbm/../../lib_lightgbm.so
+
+    setup() arguments must *always* be /-separated paths relative to the
+    setup.py directory, *never* absolute paths.
+    ```
+
+- **Solution 1**: please check [this thread on stackoverflow](http://stackoverflow.com/questions/18085571/pip-install-error-setup-script-specifies-an-absolute-path).
+
+- **Question 2**: I see error messages like `Cannot get/set label/weight/init_score/group/num_data/num_feature before construct dataset`, but I already contruct dataset by some code like `train = lightgbm.Dataset(X_train, y_train)`, or error messages like `Cannot set predictor/reference/categorical feature after freed raw data, set free_raw_data=False when construct Dataset to avoid this.`.
+
+- **Solution 2**: Because LightGBM contructs bin mappers to build trees, and train and valid Datasets within one Booster share the same bin mappers, categorical features and feature names etc., the Dataset objects are constructed when contruct a Booster. And if you set free_raw_data=True (default), the raw data (with python data struct) will be freed. So, if you want to:
+
+  + get label(or weight/init_score/group) before contruct dataset, it's same as get `self.label`
+  + set label(or weight/init_score/group) before contruct dataset, it's same as `self.label=some_label_array`
+  + get num_data(or num_feature) before contruct dataset, you can get data with `self.data`, then if your data is `numpy.ndarray`, use some code like `self.data.shape`
+  + set predictor(or reference/categorical feature) after contruct dataset, you should set free_raw_data=False or init a Dataset object with the same raw data
--- a/python-package/README.rst
+++ b/python-package/README.rst
@@ -14,31 +14,20 @@ Note: Make sure you have `setuptools <https://pypi.python.org/pypi/setuptools>`_
 Examples
 --------

-  Refer also to the walk through examples in `python-guide
-   folder <https://github.com/Microsoft/LightGBM/tree/master/examples/python-guide>`__
+Refer to the walk through examples in `python-guide folder <https://github.com/Microsoft/LightGBM/tree/master/examples/python-guide>`__


 Troubleshooting
 --------

- **Trouble 1**: I see error messages like this when install from github using `python setup.py install`.
-
-    error: Error: setup script specifies an absolute path:
-
-    /Users/Microsoft/LightGBM/python-package/lightgbm/../../lib_lightgbm.so
-
-    setup() arguments must *always* be /-separated paths relative to the
-    setup.py directory, *never* absolute paths.
-
- **Solution 1**: please check `here <http://stackoverflow.com/questions/18085571/pip-install-error-setup-script-specifies-an-absolute-path>`__.
-
+Refer to `FAQ <https://github.com/Microsoft/LightGBM/tree/master/docs/FAQ.md>`__ 

 Developments
 --------

-The code style of python package follows `pep-8 <https://www.python.org/dev/peps/pep-0008/>`__. If you would like to make a contribution and not familiar with pep-8, please check the pep-8 style guide first. Otherwise, you won't pass the check. You should be careful about:
+The code style of python package follows `pep8 <https://www.python.org/dev/peps/pep-0008/>`__. If you would like to make a contribution and not familiar with pep-8, please check the pep8 style guide first. Otherwise, you won't pass the check. You should be careful about:

- E1 Indentation (check pep-8 link above)
+- E1 Indentation (check pep8 link above)
 - E202 whitespace before and after brackets
 - E225 missing whitespace around operator
 - E226 missing whitespace around arithmetic operator

--- a/python-package/lightgbm/basic.py
+++ b/python-package/lightgbm/basic.py
 # coding: utf-8
 # pylint: disable = invalid-name, C0111, C0301
 # pylint: disable = R0912, R0913, R0914, W0105, W0201, W0212
-# pylint: disable = E1101
 """Wrapper c_api of LightGBM"""
 from __future__ import absolute_import

@@ -546,11 +545,8 @@ class Dataset(object):
    def __del__(self):
        self._free_handle()

-    def _is_constructed(self):
-        return self.handle is not None
-
    def _free_handle(self):
-        if self._is_constructed():
+        if self.handle is not None:
            _safe_call(_LIB.LGBM_DatasetFree(self.handle))
            self.handle = None

@@ -725,7 +721,7 @@ class Dataset(object):

    def construct(self):
        """Lazy init"""
-        if not self._is_constructed():
+        if self.handle is None:
            if self.reference is not None:
                if self.used_indices is None:
                    """create valid"""
@@ -829,8 +825,8 @@ class Dataset(object):
        data: numpy array or list or None
            The array ofdata to be set
        """
-        if not self._is_constructed():
-            raise Exception("cannot set filed before construct dataset handle")
+        if self.handle is None:
+            raise Exception("Cannot set %s before construct dataset" % field_name)
        if data is None:
            """set to None"""
            _safe_call(_LIB.LGBM_DatasetSetField(
@@ -872,8 +868,8 @@ class Dataset(object):
        info : array
            A numpy array of information of the data
        """
-        if not self._is_constructed():
-            raise Exception("cannot Get filed before construct dataset handle")
+        if self.handle is None:
+            raise Exception("Cannot get %s before construct dataset" % field_name)
        tmp_out_len = ctypes.c_int()
        out_type = ctypes.c_int()
        ret = ctypes.POINTER(ctypes.c_void_p)()
@@ -910,8 +906,7 @@ class Dataset(object):
            self.categorical_feature = categorical_feature
            self._free_handle()
        else:
-            raise LightGBMError("Cannot set categorical feature after freed raw data,\
-             Set free_raw_data=False when construct Dataset to avoid this.")
+            raise LightGBMError("Cannot set categorical feature after freed raw data, set free_raw_data=False when construct Dataset to avoid this.")

    def _set_predictor(self, predictor):
        """
@@ -924,7 +919,7 @@ class Dataset(object):
            self._predictor = predictor
            self._free_handle()
        else:
-            raise LightGBMError("Cannot set predictor after freed raw data,Set free_raw_data=False when construct Dataset to avoid this.")
+            raise LightGBMError("Cannot set predictor after freed raw data, set free_raw_data=False when construct Dataset to avoid this.")

    def set_reference(self, reference):
        """
@@ -944,8 +939,7 @@ class Dataset(object):
            self.reference = reference
            self._free_handle()
        else:
-            raise LightGBMError("Cannot set reference after freed raw data,\
-             Set free_raw_data=False when construct Dataset to avoid this.")
+            raise LightGBMError("Cannot set reference after freed raw data, set free_raw_data=False when construct Dataset to avoid this.")

    def set_feature_name(self, feature_name):
        """
@@ -957,7 +951,7 @@ class Dataset(object):
            Feature names
        """
        self.feature_name = feature_name
-        if self._is_constructed() and feature_name is not None:
+        if self.handle is not None and feature_name is not None:
            if len(feature_name) != self.num_feature():
                raise ValueError("Length of feature_name({}) and num_feature({}) don't match".format(len(feature_name), self.num_feature()))
            c_feature_name = [c_str(name) for name in feature_name]
@@ -976,7 +970,7 @@ class Dataset(object):
            The label information to be set into Dataset
        """
        self.label = label
-        if self._is_constructed():
+        if self.handle is not None:
            label = list_to_1d_numpy(label, name='label')
            self.set_field('label', label)

@@ -990,7 +984,7 @@ class Dataset(object):
            Weight for each data point
        """
        self.weight = weight
-        if self._is_constructed() and weight is not None:
+        if self.handle is not None and weight is not None:
            weight = list_to_1d_numpy(weight, name='weight')
            self.set_field('weight', weight)

@@ -1004,7 +998,7 @@ class Dataset(object):
            Init score for booster
        """
        self.init_score = init_score
-        if self._is_constructed() and init_score is not None:
+        if self.handle is not None and init_score is not None:
            init_score = list_to_1d_numpy(init_score, name='init_score')
            self.set_field('init_score', init_score)

@@ -1018,7 +1012,7 @@ class Dataset(object):
            Group size of each group
        """
        self.group = group
-        if self._is_constructed() and group is not None:
+        if self.handle is not None and group is not None:
            group = list_to_1d_numpy(group, np.int32, name='group')
            self.set_field('group', group)

@@ -1030,7 +1024,7 @@ class Dataset(object):
        -------
        label : array
        """
-        if self.label is None and self._is_constructed():
+        if self.label is None and self.handle is not None:
            self.label = self.get_field('label')
        return self.label

@@ -1042,7 +1036,7 @@ class Dataset(object):
        -------
        weight : array
        """
-        if self.weight is None and self._is_constructed():
+        if self.weight is None and self.handle is not None:
            self.weight = self.get_field('weight')
        return self.weight

@@ -1054,7 +1048,7 @@ class Dataset(object):
        -------
        init_score : array
        """
-        if self.init_score is None and self._is_constructed():
+        if self.init_score is None and self.handle is not None:
            self.init_score = self.get_field('init_score')
        return self.init_score

@@ -1066,7 +1060,7 @@ class Dataset(object):
        -------
        init_score : array
        """
-        if self.group is None and self._is_constructed():
+        if self.group is None and self.handle is not None:
            self.group = self.get_field('group')
            if self.group is not None:
                # group data from LightGBM is boundaries data, need to convert to group size
@@ -1084,13 +1078,13 @@ class Dataset(object):
        -------
        number of rows : int
        """
-        if self._is_constructed():
+        if self.handle is not None:
            ret = ctypes.c_int()
            _safe_call(_LIB.LGBM_DatasetGetNumData(self.handle,
                                                   ctypes.byref(ret)))
            return ret.value
        else:
-            raise LightGBMError("Cannot call num_data before construct, please call it explicitly")
+            raise LightGBMError("Cannot get num_data before construct dataset")

    def num_feature(self):
        """
@@ -1100,13 +1094,13 @@ class Dataset(object):
        -------
        number of columns : int
        """
-        if self._is_constructed():
+        if self.handle is not None:
            ret = ctypes.c_int()
            _safe_call(_LIB.LGBM_DatasetGetNumFeature(self.handle,
                                                      ctypes.byref(ret)))
            return ret.value
        else:
-            raise LightGBMError("Cannot call num_feature before construct, please call it explicitly")
+            raise LightGBMError("Cannot get num_feature before construct dataset")


 class Booster(object):

--- a/python-package/lightgbm/callback.py
+++ b/python-package/lightgbm/callback.py
 # coding: utf-8
 # pylint: disable = invalid-name, W0105, C0301
 from __future__ import absolute_import
+
 import collections


@@ -30,12 +31,12 @@ CallbackEnv = collections.namedtuple(
 def _format_eval_result(value, show_stdv=True):
    """format metric string"""
    if len(value) == 4:
-        return '%s\'s %s:%g' % (value[0], value[1], value[2])
+        return '%s\'s %s: %g' % (value[0], value[1], value[2])
    elif len(value) == 5:
        if show_stdv:
-            return '%s\'s %s:%g+%g' % (value[0], value[1], value[2], value[4])
+            return '%s\'s %s: %g + %g' % (value[0], value[1], value[2], value[4])
        else:
-            return '%s\'s %s:%g' % (value[0], value[1], value[2])
+            return '%s\'s %s: %g' % (value[0], value[1], value[2])
    else:
        raise ValueError("Wrong metric value")

@@ -58,12 +59,8 @@ def print_evaluation(period=1, show_stdv=True):
    """
    def callback(env):
        """internal function"""
-        if not env.evaluation_result_list or period <= 0:
-            return
-        if (env.iteration + 1) % period == 0:
-            result = '\t'.join(
-                [_format_eval_result(x, show_stdv) for x in env.evaluation_result_list]
-            )
+        if period > 0 and env.evaluation_result_list and (env.iteration + 1) % period == 0:
+            result = '\t'.join([_format_eval_result(x, show_stdv) for x in env.evaluation_result_list])
            print('[%d]\t%s' % (env.iteration + 1, result))
    callback.order = 10
    return callback

--- a/python-package/lightgbm/engine.py
+++ b/python-package/lightgbm/engine.py
@@ -5,9 +5,11 @@ from __future__ import absolute_import

 import collections
 from operator import attrgetter
+
 import numpy as np
-from .basic import LightGBMError, _InnerPredictor, Dataset, Booster, is_str
+
 from . import callback
+from .basic import Booster, Dataset, LightGBMError, _InnerPredictor, is_str


 def train(params, train_set, num_boost_round=100,
@@ -214,6 +216,7 @@ class CVBooster(object):
            return ret
        return handlerFunction

+
 try:
    from sklearn.model_selection import StratifiedKFold
    SKLEARN_StratifiedKFold = True

--- a/python-package/lightgbm/sklearn.py
+++ b/python-package/lightgbm/sklearn.py
@@ -2,11 +2,14 @@
 # pylint: disable = invalid-name, W0105, C0111, C0301
 """Scikit-Learn Wrapper interface for LightGBM."""
 from __future__ import absolute_import
+
 import inspect

 import numpy as np
-from .basic import LightGBMError, Dataset, IS_PY3
+
+from .basic import IS_PY3, Dataset, LightGBMError
 from .engine import train
+
 '''sklearn'''
 try:
    from sklearn.base import BaseEstimator

--- a/python-package/setup.py
+++ b/python-package/setup.py
@@ -2,9 +2,11 @@
 # pylint: disable=invalid-name, exec-used
 """Setup lightgbm package."""
 from __future__ import absolute_import
-import sys
+
 import os
-from setuptools import setup, find_packages
+import sys
+
+from setuptools import find_packages, setup

 sys.path.insert(0, '.')


--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -112,7 +112,7 @@ class TestEngine(unittest.TestCase):

    def test_cv(self):
        lgb_train, _ = test_template(return_data=True)
-        lgb.cv({'verbose': 0}, lgb_train, num_boost_round=20, nfold=5,
+        lgb.cv({'verbose': -1}, lgb_train, num_boost_round=20, nfold=5,
               metrics='l1', verbose_eval=False,
               callbacks=[lgb.reset_parameter(learning_rate=lambda i: 0.1 - 0.001 * i)])