Commit 1b7643ba authored by wxchan's avatar wxchan Committed by Guolin Ke
Browse files

`_is_constructed` -> `handle is not None`; add FAQ for docs (#173)

* use handle is not None for _is_constructed

* sort imports; clean code; move FAQ to docs
parent f3f2f5a9
LightGBM FAQ
=======================
###Catalog
- [Python-package](FAQ.md#python-package)
###Python-package
- **Question 1**: I see error messages like this when install from github using `python setup.py install`.
```
error: Error: setup script specifies an absolute path:
/Users/Microsoft/LightGBM/python-package/lightgbm/../../lib_lightgbm.so
setup() arguments must *always* be /-separated paths relative to the
setup.py directory, *never* absolute paths.
```
- **Solution 1**: please check [this thread on stackoverflow](http://stackoverflow.com/questions/18085571/pip-install-error-setup-script-specifies-an-absolute-path).
- **Question 2**: I see error messages like `Cannot get/set label/weight/init_score/group/num_data/num_feature before construct dataset`, but I already contruct dataset by some code like `train = lightgbm.Dataset(X_train, y_train)`, or error messages like `Cannot set predictor/reference/categorical feature after freed raw data, set free_raw_data=False when construct Dataset to avoid this.`.
- **Solution 2**: Because LightGBM contructs bin mappers to build trees, and train and valid Datasets within one Booster share the same bin mappers, categorical features and feature names etc., the Dataset objects are constructed when contruct a Booster. And if you set free_raw_data=True (default), the raw data (with python data struct) will be freed. So, if you want to:
+ get label(or weight/init_score/group) before contruct dataset, it's same as get `self.label`
+ set label(or weight/init_score/group) before contruct dataset, it's same as `self.label=some_label_array`
+ get num_data(or num_feature) before contruct dataset, you can get data with `self.data`, then if your data is `numpy.ndarray`, use some code like `self.data.shape`
+ set predictor(or reference/categorical feature) after contruct dataset, you should set free_raw_data=False or init a Dataset object with the same raw data
...@@ -14,31 +14,20 @@ Note: Make sure you have `setuptools <https://pypi.python.org/pypi/setuptools>`_ ...@@ -14,31 +14,20 @@ Note: Make sure you have `setuptools <https://pypi.python.org/pypi/setuptools>`_
Examples Examples
-------- --------
- Refer also to the walk through examples in `python-guide Refer to the walk through examples in `python-guide folder <https://github.com/Microsoft/LightGBM/tree/master/examples/python-guide>`__
folder <https://github.com/Microsoft/LightGBM/tree/master/examples/python-guide>`__
Troubleshooting Troubleshooting
-------- --------
- **Trouble 1**: I see error messages like this when install from github using `python setup.py install`. Refer to `FAQ <https://github.com/Microsoft/LightGBM/tree/master/docs/FAQ.md>`__
error: Error: setup script specifies an absolute path:
/Users/Microsoft/LightGBM/python-package/lightgbm/../../lib_lightgbm.so
setup() arguments must *always* be /-separated paths relative to the
setup.py directory, *never* absolute paths.
- **Solution 1**: please check `here <http://stackoverflow.com/questions/18085571/pip-install-error-setup-script-specifies-an-absolute-path>`__.
Developments Developments
-------- --------
The code style of python package follows `pep-8 <https://www.python.org/dev/peps/pep-0008/>`__. If you would like to make a contribution and not familiar with pep-8, please check the pep-8 style guide first. Otherwise, you won't pass the check. You should be careful about: The code style of python package follows `pep8 <https://www.python.org/dev/peps/pep-0008/>`__. If you would like to make a contribution and not familiar with pep-8, please check the pep8 style guide first. Otherwise, you won't pass the check. You should be careful about:
- E1 Indentation (check pep-8 link above) - E1 Indentation (check pep8 link above)
- E202 whitespace before and after brackets - E202 whitespace before and after brackets
- E225 missing whitespace around operator - E225 missing whitespace around operator
- E226 missing whitespace around arithmetic operator - E226 missing whitespace around arithmetic operator
......
# coding: utf-8 # coding: utf-8
# pylint: disable = invalid-name, C0111, C0301 # pylint: disable = invalid-name, C0111, C0301
# pylint: disable = R0912, R0913, R0914, W0105, W0201, W0212 # pylint: disable = R0912, R0913, R0914, W0105, W0201, W0212
# pylint: disable = E1101
"""Wrapper c_api of LightGBM""" """Wrapper c_api of LightGBM"""
from __future__ import absolute_import from __future__ import absolute_import
...@@ -546,11 +545,8 @@ class Dataset(object): ...@@ -546,11 +545,8 @@ class Dataset(object):
def __del__(self): def __del__(self):
self._free_handle() self._free_handle()
def _is_constructed(self):
return self.handle is not None
def _free_handle(self): def _free_handle(self):
if self._is_constructed(): if self.handle is not None:
_safe_call(_LIB.LGBM_DatasetFree(self.handle)) _safe_call(_LIB.LGBM_DatasetFree(self.handle))
self.handle = None self.handle = None
...@@ -725,7 +721,7 @@ class Dataset(object): ...@@ -725,7 +721,7 @@ class Dataset(object):
def construct(self): def construct(self):
"""Lazy init""" """Lazy init"""
if not self._is_constructed(): if self.handle is None:
if self.reference is not None: if self.reference is not None:
if self.used_indices is None: if self.used_indices is None:
"""create valid""" """create valid"""
...@@ -829,8 +825,8 @@ class Dataset(object): ...@@ -829,8 +825,8 @@ class Dataset(object):
data: numpy array or list or None data: numpy array or list or None
The array ofdata to be set The array ofdata to be set
""" """
if not self._is_constructed(): if self.handle is None:
raise Exception("cannot set filed before construct dataset handle") raise Exception("Cannot set %s before construct dataset" % field_name)
if data is None: if data is None:
"""set to None""" """set to None"""
_safe_call(_LIB.LGBM_DatasetSetField( _safe_call(_LIB.LGBM_DatasetSetField(
...@@ -872,8 +868,8 @@ class Dataset(object): ...@@ -872,8 +868,8 @@ class Dataset(object):
info : array info : array
A numpy array of information of the data A numpy array of information of the data
""" """
if not self._is_constructed(): if self.handle is None:
raise Exception("cannot Get filed before construct dataset handle") raise Exception("Cannot get %s before construct dataset" % field_name)
tmp_out_len = ctypes.c_int() tmp_out_len = ctypes.c_int()
out_type = ctypes.c_int() out_type = ctypes.c_int()
ret = ctypes.POINTER(ctypes.c_void_p)() ret = ctypes.POINTER(ctypes.c_void_p)()
...@@ -910,8 +906,7 @@ class Dataset(object): ...@@ -910,8 +906,7 @@ class Dataset(object):
self.categorical_feature = categorical_feature self.categorical_feature = categorical_feature
self._free_handle() self._free_handle()
else: else:
raise LightGBMError("Cannot set categorical feature after freed raw data,\ raise LightGBMError("Cannot set categorical feature after freed raw data, set free_raw_data=False when construct Dataset to avoid this.")
Set free_raw_data=False when construct Dataset to avoid this.")
def _set_predictor(self, predictor): def _set_predictor(self, predictor):
""" """
...@@ -924,7 +919,7 @@ class Dataset(object): ...@@ -924,7 +919,7 @@ class Dataset(object):
self._predictor = predictor self._predictor = predictor
self._free_handle() self._free_handle()
else: else:
raise LightGBMError("Cannot set predictor after freed raw data,Set free_raw_data=False when construct Dataset to avoid this.") raise LightGBMError("Cannot set predictor after freed raw data, set free_raw_data=False when construct Dataset to avoid this.")
def set_reference(self, reference): def set_reference(self, reference):
""" """
...@@ -944,8 +939,7 @@ class Dataset(object): ...@@ -944,8 +939,7 @@ class Dataset(object):
self.reference = reference self.reference = reference
self._free_handle() self._free_handle()
else: else:
raise LightGBMError("Cannot set reference after freed raw data,\ raise LightGBMError("Cannot set reference after freed raw data, set free_raw_data=False when construct Dataset to avoid this.")
Set free_raw_data=False when construct Dataset to avoid this.")
def set_feature_name(self, feature_name): def set_feature_name(self, feature_name):
""" """
...@@ -957,7 +951,7 @@ class Dataset(object): ...@@ -957,7 +951,7 @@ class Dataset(object):
Feature names Feature names
""" """
self.feature_name = feature_name self.feature_name = feature_name
if self._is_constructed() and feature_name is not None: if self.handle is not None and feature_name is not None:
if len(feature_name) != self.num_feature(): if len(feature_name) != self.num_feature():
raise ValueError("Length of feature_name({}) and num_feature({}) don't match".format(len(feature_name), self.num_feature())) raise ValueError("Length of feature_name({}) and num_feature({}) don't match".format(len(feature_name), self.num_feature()))
c_feature_name = [c_str(name) for name in feature_name] c_feature_name = [c_str(name) for name in feature_name]
...@@ -976,7 +970,7 @@ class Dataset(object): ...@@ -976,7 +970,7 @@ class Dataset(object):
The label information to be set into Dataset The label information to be set into Dataset
""" """
self.label = label self.label = label
if self._is_constructed(): if self.handle is not None:
label = list_to_1d_numpy(label, name='label') label = list_to_1d_numpy(label, name='label')
self.set_field('label', label) self.set_field('label', label)
...@@ -990,7 +984,7 @@ class Dataset(object): ...@@ -990,7 +984,7 @@ class Dataset(object):
Weight for each data point Weight for each data point
""" """
self.weight = weight self.weight = weight
if self._is_constructed() and weight is not None: if self.handle is not None and weight is not None:
weight = list_to_1d_numpy(weight, name='weight') weight = list_to_1d_numpy(weight, name='weight')
self.set_field('weight', weight) self.set_field('weight', weight)
...@@ -1004,7 +998,7 @@ class Dataset(object): ...@@ -1004,7 +998,7 @@ class Dataset(object):
Init score for booster Init score for booster
""" """
self.init_score = init_score self.init_score = init_score
if self._is_constructed() and init_score is not None: if self.handle is not None and init_score is not None:
init_score = list_to_1d_numpy(init_score, name='init_score') init_score = list_to_1d_numpy(init_score, name='init_score')
self.set_field('init_score', init_score) self.set_field('init_score', init_score)
...@@ -1018,7 +1012,7 @@ class Dataset(object): ...@@ -1018,7 +1012,7 @@ class Dataset(object):
Group size of each group Group size of each group
""" """
self.group = group self.group = group
if self._is_constructed() and group is not None: if self.handle is not None and group is not None:
group = list_to_1d_numpy(group, np.int32, name='group') group = list_to_1d_numpy(group, np.int32, name='group')
self.set_field('group', group) self.set_field('group', group)
...@@ -1030,7 +1024,7 @@ class Dataset(object): ...@@ -1030,7 +1024,7 @@ class Dataset(object):
------- -------
label : array label : array
""" """
if self.label is None and self._is_constructed(): if self.label is None and self.handle is not None:
self.label = self.get_field('label') self.label = self.get_field('label')
return self.label return self.label
...@@ -1042,7 +1036,7 @@ class Dataset(object): ...@@ -1042,7 +1036,7 @@ class Dataset(object):
------- -------
weight : array weight : array
""" """
if self.weight is None and self._is_constructed(): if self.weight is None and self.handle is not None:
self.weight = self.get_field('weight') self.weight = self.get_field('weight')
return self.weight return self.weight
...@@ -1054,7 +1048,7 @@ class Dataset(object): ...@@ -1054,7 +1048,7 @@ class Dataset(object):
------- -------
init_score : array init_score : array
""" """
if self.init_score is None and self._is_constructed(): if self.init_score is None and self.handle is not None:
self.init_score = self.get_field('init_score') self.init_score = self.get_field('init_score')
return self.init_score return self.init_score
...@@ -1066,7 +1060,7 @@ class Dataset(object): ...@@ -1066,7 +1060,7 @@ class Dataset(object):
------- -------
init_score : array init_score : array
""" """
if self.group is None and self._is_constructed(): if self.group is None and self.handle is not None:
self.group = self.get_field('group') self.group = self.get_field('group')
if self.group is not None: if self.group is not None:
# group data from LightGBM is boundaries data, need to convert to group size # group data from LightGBM is boundaries data, need to convert to group size
...@@ -1084,13 +1078,13 @@ class Dataset(object): ...@@ -1084,13 +1078,13 @@ class Dataset(object):
------- -------
number of rows : int number of rows : int
""" """
if self._is_constructed(): if self.handle is not None:
ret = ctypes.c_int() ret = ctypes.c_int()
_safe_call(_LIB.LGBM_DatasetGetNumData(self.handle, _safe_call(_LIB.LGBM_DatasetGetNumData(self.handle,
ctypes.byref(ret))) ctypes.byref(ret)))
return ret.value return ret.value
else: else:
raise LightGBMError("Cannot call num_data before construct, please call it explicitly") raise LightGBMError("Cannot get num_data before construct dataset")
def num_feature(self): def num_feature(self):
""" """
...@@ -1100,13 +1094,13 @@ class Dataset(object): ...@@ -1100,13 +1094,13 @@ class Dataset(object):
------- -------
number of columns : int number of columns : int
""" """
if self._is_constructed(): if self.handle is not None:
ret = ctypes.c_int() ret = ctypes.c_int()
_safe_call(_LIB.LGBM_DatasetGetNumFeature(self.handle, _safe_call(_LIB.LGBM_DatasetGetNumFeature(self.handle,
ctypes.byref(ret))) ctypes.byref(ret)))
return ret.value return ret.value
else: else:
raise LightGBMError("Cannot call num_feature before construct, please call it explicitly") raise LightGBMError("Cannot get num_feature before construct dataset")
class Booster(object): class Booster(object):
......
# coding: utf-8 # coding: utf-8
# pylint: disable = invalid-name, W0105, C0301 # pylint: disable = invalid-name, W0105, C0301
from __future__ import absolute_import from __future__ import absolute_import
import collections import collections
...@@ -30,12 +31,12 @@ CallbackEnv = collections.namedtuple( ...@@ -30,12 +31,12 @@ CallbackEnv = collections.namedtuple(
def _format_eval_result(value, show_stdv=True): def _format_eval_result(value, show_stdv=True):
"""format metric string""" """format metric string"""
if len(value) == 4: if len(value) == 4:
return '%s\'s %s:%g' % (value[0], value[1], value[2]) return '%s\'s %s: %g' % (value[0], value[1], value[2])
elif len(value) == 5: elif len(value) == 5:
if show_stdv: if show_stdv:
return '%s\'s %s:%g+%g' % (value[0], value[1], value[2], value[4]) return '%s\'s %s: %g + %g' % (value[0], value[1], value[2], value[4])
else: else:
return '%s\'s %s:%g' % (value[0], value[1], value[2]) return '%s\'s %s: %g' % (value[0], value[1], value[2])
else: else:
raise ValueError("Wrong metric value") raise ValueError("Wrong metric value")
...@@ -58,12 +59,8 @@ def print_evaluation(period=1, show_stdv=True): ...@@ -58,12 +59,8 @@ def print_evaluation(period=1, show_stdv=True):
""" """
def callback(env): def callback(env):
"""internal function""" """internal function"""
if not env.evaluation_result_list or period <= 0: if period > 0 and env.evaluation_result_list and (env.iteration + 1) % period == 0:
return result = '\t'.join([_format_eval_result(x, show_stdv) for x in env.evaluation_result_list])
if (env.iteration + 1) % period == 0:
result = '\t'.join(
[_format_eval_result(x, show_stdv) for x in env.evaluation_result_list]
)
print('[%d]\t%s' % (env.iteration + 1, result)) print('[%d]\t%s' % (env.iteration + 1, result))
callback.order = 10 callback.order = 10
return callback return callback
......
...@@ -5,9 +5,11 @@ from __future__ import absolute_import ...@@ -5,9 +5,11 @@ from __future__ import absolute_import
import collections import collections
from operator import attrgetter from operator import attrgetter
import numpy as np import numpy as np
from .basic import LightGBMError, _InnerPredictor, Dataset, Booster, is_str
from . import callback from . import callback
from .basic import Booster, Dataset, LightGBMError, _InnerPredictor, is_str
def train(params, train_set, num_boost_round=100, def train(params, train_set, num_boost_round=100,
...@@ -214,6 +216,7 @@ class CVBooster(object): ...@@ -214,6 +216,7 @@ class CVBooster(object):
return ret return ret
return handlerFunction return handlerFunction
try: try:
from sklearn.model_selection import StratifiedKFold from sklearn.model_selection import StratifiedKFold
SKLEARN_StratifiedKFold = True SKLEARN_StratifiedKFold = True
......
...@@ -2,11 +2,14 @@ ...@@ -2,11 +2,14 @@
# pylint: disable = invalid-name, W0105, C0111, C0301 # pylint: disable = invalid-name, W0105, C0111, C0301
"""Scikit-Learn Wrapper interface for LightGBM.""" """Scikit-Learn Wrapper interface for LightGBM."""
from __future__ import absolute_import from __future__ import absolute_import
import inspect import inspect
import numpy as np import numpy as np
from .basic import LightGBMError, Dataset, IS_PY3
from .basic import IS_PY3, Dataset, LightGBMError
from .engine import train from .engine import train
'''sklearn''' '''sklearn'''
try: try:
from sklearn.base import BaseEstimator from sklearn.base import BaseEstimator
......
...@@ -2,9 +2,11 @@ ...@@ -2,9 +2,11 @@
# pylint: disable=invalid-name, exec-used # pylint: disable=invalid-name, exec-used
"""Setup lightgbm package.""" """Setup lightgbm package."""
from __future__ import absolute_import from __future__ import absolute_import
import sys
import os import os
from setuptools import setup, find_packages import sys
from setuptools import find_packages, setup
sys.path.insert(0, '.') sys.path.insert(0, '.')
......
...@@ -112,7 +112,7 @@ class TestEngine(unittest.TestCase): ...@@ -112,7 +112,7 @@ class TestEngine(unittest.TestCase):
def test_cv(self): def test_cv(self):
lgb_train, _ = test_template(return_data=True) lgb_train, _ = test_template(return_data=True)
lgb.cv({'verbose': 0}, lgb_train, num_boost_round=20, nfold=5, lgb.cv({'verbose': -1}, lgb_train, num_boost_round=20, nfold=5,
metrics='l1', verbose_eval=False, metrics='l1', verbose_eval=False,
callbacks=[lgb.reset_parameter(learning_rate=lambda i: 0.1 - 0.001 * i)]) callbacks=[lgb.reset_parameter(learning_rate=lambda i: 0.1 - 0.001 * i)])
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment