Commit 1b7643ba authored by wxchan's avatar wxchan Committed by Guolin Ke
Browse files

`_is_constructed` -> `handle is not None`; add FAQ for docs (#173)

* use handle is not None for _is_constructed

* sort imports; clean code; move FAQ to docs
parent f3f2f5a9
LightGBM FAQ
=======================
###Catalog
- [Python-package](FAQ.md#python-package)
###Python-package
- **Question 1**: I see error messages like this when install from github using `python setup.py install`.
```
error: Error: setup script specifies an absolute path:
/Users/Microsoft/LightGBM/python-package/lightgbm/../../lib_lightgbm.so
setup() arguments must *always* be /-separated paths relative to the
setup.py directory, *never* absolute paths.
```
- **Solution 1**: please check [this thread on stackoverflow](http://stackoverflow.com/questions/18085571/pip-install-error-setup-script-specifies-an-absolute-path).
- **Question 2**: I see error messages like `Cannot get/set label/weight/init_score/group/num_data/num_feature before construct dataset`, but I already contruct dataset by some code like `train = lightgbm.Dataset(X_train, y_train)`, or error messages like `Cannot set predictor/reference/categorical feature after freed raw data, set free_raw_data=False when construct Dataset to avoid this.`.
- **Solution 2**: Because LightGBM contructs bin mappers to build trees, and train and valid Datasets within one Booster share the same bin mappers, categorical features and feature names etc., the Dataset objects are constructed when contruct a Booster. And if you set free_raw_data=True (default), the raw data (with python data struct) will be freed. So, if you want to:
+ get label(or weight/init_score/group) before contruct dataset, it's same as get `self.label`
+ set label(or weight/init_score/group) before contruct dataset, it's same as `self.label=some_label_array`
+ get num_data(or num_feature) before contruct dataset, you can get data with `self.data`, then if your data is `numpy.ndarray`, use some code like `self.data.shape`
+ set predictor(or reference/categorical feature) after contruct dataset, you should set free_raw_data=False or init a Dataset object with the same raw data
......@@ -14,31 +14,20 @@ Note: Make sure you have `setuptools <https://pypi.python.org/pypi/setuptools>`_
Examples
--------
- Refer also to the walk through examples in `python-guide
folder <https://github.com/Microsoft/LightGBM/tree/master/examples/python-guide>`__
Refer to the walk through examples in `python-guide folder <https://github.com/Microsoft/LightGBM/tree/master/examples/python-guide>`__
Troubleshooting
--------
- **Trouble 1**: I see error messages like this when install from github using `python setup.py install`.
error: Error: setup script specifies an absolute path:
/Users/Microsoft/LightGBM/python-package/lightgbm/../../lib_lightgbm.so
setup() arguments must *always* be /-separated paths relative to the
setup.py directory, *never* absolute paths.
- **Solution 1**: please check `here <http://stackoverflow.com/questions/18085571/pip-install-error-setup-script-specifies-an-absolute-path>`__.
Refer to `FAQ <https://github.com/Microsoft/LightGBM/tree/master/docs/FAQ.md>`__
Developments
--------
The code style of python package follows `pep-8 <https://www.python.org/dev/peps/pep-0008/>`__. If you would like to make a contribution and not familiar with pep-8, please check the pep-8 style guide first. Otherwise, you won't pass the check. You should be careful about:
The code style of python package follows `pep8 <https://www.python.org/dev/peps/pep-0008/>`__. If you would like to make a contribution and not familiar with pep-8, please check the pep8 style guide first. Otherwise, you won't pass the check. You should be careful about:
- E1 Indentation (check pep-8 link above)
- E1 Indentation (check pep8 link above)
- E202 whitespace before and after brackets
- E225 missing whitespace around operator
- E226 missing whitespace around arithmetic operator
......
# coding: utf-8
# pylint: disable = invalid-name, C0111, C0301
# pylint: disable = R0912, R0913, R0914, W0105, W0201, W0212
# pylint: disable = E1101
"""Wrapper c_api of LightGBM"""
from __future__ import absolute_import
......@@ -546,11 +545,8 @@ class Dataset(object):
def __del__(self):
self._free_handle()
def _is_constructed(self):
return self.handle is not None
def _free_handle(self):
if self._is_constructed():
if self.handle is not None:
_safe_call(_LIB.LGBM_DatasetFree(self.handle))
self.handle = None
......@@ -725,7 +721,7 @@ class Dataset(object):
def construct(self):
"""Lazy init"""
if not self._is_constructed():
if self.handle is None:
if self.reference is not None:
if self.used_indices is None:
"""create valid"""
......@@ -829,8 +825,8 @@ class Dataset(object):
data: numpy array or list or None
The array ofdata to be set
"""
if not self._is_constructed():
raise Exception("cannot set filed before construct dataset handle")
if self.handle is None:
raise Exception("Cannot set %s before construct dataset" % field_name)
if data is None:
"""set to None"""
_safe_call(_LIB.LGBM_DatasetSetField(
......@@ -872,8 +868,8 @@ class Dataset(object):
info : array
A numpy array of information of the data
"""
if not self._is_constructed():
raise Exception("cannot Get filed before construct dataset handle")
if self.handle is None:
raise Exception("Cannot get %s before construct dataset" % field_name)
tmp_out_len = ctypes.c_int()
out_type = ctypes.c_int()
ret = ctypes.POINTER(ctypes.c_void_p)()
......@@ -910,8 +906,7 @@ class Dataset(object):
self.categorical_feature = categorical_feature
self._free_handle()
else:
raise LightGBMError("Cannot set categorical feature after freed raw data,\
Set free_raw_data=False when construct Dataset to avoid this.")
raise LightGBMError("Cannot set categorical feature after freed raw data, set free_raw_data=False when construct Dataset to avoid this.")
def _set_predictor(self, predictor):
"""
......@@ -924,7 +919,7 @@ class Dataset(object):
self._predictor = predictor
self._free_handle()
else:
raise LightGBMError("Cannot set predictor after freed raw data,Set free_raw_data=False when construct Dataset to avoid this.")
raise LightGBMError("Cannot set predictor after freed raw data, set free_raw_data=False when construct Dataset to avoid this.")
def set_reference(self, reference):
"""
......@@ -944,8 +939,7 @@ class Dataset(object):
self.reference = reference
self._free_handle()
else:
raise LightGBMError("Cannot set reference after freed raw data,\
Set free_raw_data=False when construct Dataset to avoid this.")
raise LightGBMError("Cannot set reference after freed raw data, set free_raw_data=False when construct Dataset to avoid this.")
def set_feature_name(self, feature_name):
"""
......@@ -957,7 +951,7 @@ class Dataset(object):
Feature names
"""
self.feature_name = feature_name
if self._is_constructed() and feature_name is not None:
if self.handle is not None and feature_name is not None:
if len(feature_name) != self.num_feature():
raise ValueError("Length of feature_name({}) and num_feature({}) don't match".format(len(feature_name), self.num_feature()))
c_feature_name = [c_str(name) for name in feature_name]
......@@ -976,7 +970,7 @@ class Dataset(object):
The label information to be set into Dataset
"""
self.label = label
if self._is_constructed():
if self.handle is not None:
label = list_to_1d_numpy(label, name='label')
self.set_field('label', label)
......@@ -990,7 +984,7 @@ class Dataset(object):
Weight for each data point
"""
self.weight = weight
if self._is_constructed() and weight is not None:
if self.handle is not None and weight is not None:
weight = list_to_1d_numpy(weight, name='weight')
self.set_field('weight', weight)
......@@ -1004,7 +998,7 @@ class Dataset(object):
Init score for booster
"""
self.init_score = init_score
if self._is_constructed() and init_score is not None:
if self.handle is not None and init_score is not None:
init_score = list_to_1d_numpy(init_score, name='init_score')
self.set_field('init_score', init_score)
......@@ -1018,7 +1012,7 @@ class Dataset(object):
Group size of each group
"""
self.group = group
if self._is_constructed() and group is not None:
if self.handle is not None and group is not None:
group = list_to_1d_numpy(group, np.int32, name='group')
self.set_field('group', group)
......@@ -1030,7 +1024,7 @@ class Dataset(object):
-------
label : array
"""
if self.label is None and self._is_constructed():
if self.label is None and self.handle is not None:
self.label = self.get_field('label')
return self.label
......@@ -1042,7 +1036,7 @@ class Dataset(object):
-------
weight : array
"""
if self.weight is None and self._is_constructed():
if self.weight is None and self.handle is not None:
self.weight = self.get_field('weight')
return self.weight
......@@ -1054,7 +1048,7 @@ class Dataset(object):
-------
init_score : array
"""
if self.init_score is None and self._is_constructed():
if self.init_score is None and self.handle is not None:
self.init_score = self.get_field('init_score')
return self.init_score
......@@ -1066,7 +1060,7 @@ class Dataset(object):
-------
init_score : array
"""
if self.group is None and self._is_constructed():
if self.group is None and self.handle is not None:
self.group = self.get_field('group')
if self.group is not None:
# group data from LightGBM is boundaries data, need to convert to group size
......@@ -1084,13 +1078,13 @@ class Dataset(object):
-------
number of rows : int
"""
if self._is_constructed():
if self.handle is not None:
ret = ctypes.c_int()
_safe_call(_LIB.LGBM_DatasetGetNumData(self.handle,
ctypes.byref(ret)))
return ret.value
else:
raise LightGBMError("Cannot call num_data before construct, please call it explicitly")
raise LightGBMError("Cannot get num_data before construct dataset")
def num_feature(self):
"""
......@@ -1100,13 +1094,13 @@ class Dataset(object):
-------
number of columns : int
"""
if self._is_constructed():
if self.handle is not None:
ret = ctypes.c_int()
_safe_call(_LIB.LGBM_DatasetGetNumFeature(self.handle,
ctypes.byref(ret)))
return ret.value
else:
raise LightGBMError("Cannot call num_feature before construct, please call it explicitly")
raise LightGBMError("Cannot get num_feature before construct dataset")
class Booster(object):
......
# coding: utf-8
# pylint: disable = invalid-name, W0105, C0301
from __future__ import absolute_import
import collections
......@@ -30,12 +31,12 @@ CallbackEnv = collections.namedtuple(
def _format_eval_result(value, show_stdv=True):
"""format metric string"""
if len(value) == 4:
return '%s\'s %s:%g' % (value[0], value[1], value[2])
return '%s\'s %s: %g' % (value[0], value[1], value[2])
elif len(value) == 5:
if show_stdv:
return '%s\'s %s:%g+%g' % (value[0], value[1], value[2], value[4])
return '%s\'s %s: %g + %g' % (value[0], value[1], value[2], value[4])
else:
return '%s\'s %s:%g' % (value[0], value[1], value[2])
return '%s\'s %s: %g' % (value[0], value[1], value[2])
else:
raise ValueError("Wrong metric value")
......@@ -58,12 +59,8 @@ def print_evaluation(period=1, show_stdv=True):
"""
def callback(env):
"""internal function"""
if not env.evaluation_result_list or period <= 0:
return
if (env.iteration + 1) % period == 0:
result = '\t'.join(
[_format_eval_result(x, show_stdv) for x in env.evaluation_result_list]
)
if period > 0 and env.evaluation_result_list and (env.iteration + 1) % period == 0:
result = '\t'.join([_format_eval_result(x, show_stdv) for x in env.evaluation_result_list])
print('[%d]\t%s' % (env.iteration + 1, result))
callback.order = 10
return callback
......
......@@ -5,9 +5,11 @@ from __future__ import absolute_import
import collections
from operator import attrgetter
import numpy as np
from .basic import LightGBMError, _InnerPredictor, Dataset, Booster, is_str
from . import callback
from .basic import Booster, Dataset, LightGBMError, _InnerPredictor, is_str
def train(params, train_set, num_boost_round=100,
......@@ -214,6 +216,7 @@ class CVBooster(object):
return ret
return handlerFunction
try:
from sklearn.model_selection import StratifiedKFold
SKLEARN_StratifiedKFold = True
......
......@@ -2,11 +2,14 @@
# pylint: disable = invalid-name, W0105, C0111, C0301
"""Scikit-Learn Wrapper interface for LightGBM."""
from __future__ import absolute_import
import inspect
import numpy as np
from .basic import LightGBMError, Dataset, IS_PY3
from .basic import IS_PY3, Dataset, LightGBMError
from .engine import train
'''sklearn'''
try:
from sklearn.base import BaseEstimator
......
......@@ -2,9 +2,11 @@
# pylint: disable=invalid-name, exec-used
"""Setup lightgbm package."""
from __future__ import absolute_import
import sys
import os
from setuptools import setup, find_packages
import sys
from setuptools import find_packages, setup
sys.path.insert(0, '.')
......
......@@ -112,7 +112,7 @@ class TestEngine(unittest.TestCase):
def test_cv(self):
lgb_train, _ = test_template(return_data=True)
lgb.cv({'verbose': 0}, lgb_train, num_boost_round=20, nfold=5,
lgb.cv({'verbose': -1}, lgb_train, num_boost_round=20, nfold=5,
metrics='l1', verbose_eval=False,
callbacks=[lgb.reset_parameter(learning_rate=lambda i: 0.1 - 0.001 * i)])
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment